blob: ad04cca7842ffeb41f1ebd0e1756e30675abcbfe [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Skip Montanaro40fc1602001-03-01 04:27:19 +000025__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000026 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Fred Drake5db246d2000-09-29 20:44:48 +000028class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Tim Peterse1190062001-01-15 03:34:38 +000038error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000040ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000041ZIP_FILECOUNT_LIMIT = 1 << 16
42ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044# constants for Zip file compression methods
45ZIP_STORED = 0
46ZIP_DEFLATED = 8
47# Other ZIP compression methods not supported
48
Martin v. Löwisb09b8442008-07-03 14:13:42 +000049# Below are some formats and associated data for reading/writing headers using
50# the struct module. The names and structures of headers/records are those used
51# in the PKWARE description of the ZIP file format:
52# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
53# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055# The "end of central directory" structure, magic number, size, and indices
56# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000057structEndArchive = b"<4s4H2LH"
58stringEndArchive = b"PK\005\006"
59sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000060
61_ECD_SIGNATURE = 0
62_ECD_DISK_NUMBER = 1
63_ECD_DISK_START = 2
64_ECD_ENTRIES_THIS_DISK = 3
65_ECD_ENTRIES_TOTAL = 4
66_ECD_SIZE = 5
67_ECD_OFFSET = 6
68_ECD_COMMENT_SIZE = 7
69# These last two indices are not part of the structure as defined in the
70# spec, but they are used internally by this module as a convenience
71_ECD_COMMENT = 8
72_ECD_LOCATION = 9
73
74# The "central directory" structure, magic number, size, and indices
75# of entries in the structure (section V.F in the format document)
76structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000077stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078sizeCentralDir = struct.calcsize(structCentralDir)
79
Fred Drake3e038e52001-02-28 17:56:26 +000080# indexes of entries in the central directory structure
81_CD_SIGNATURE = 0
82_CD_CREATE_VERSION = 1
83_CD_CREATE_SYSTEM = 2
84_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000086_CD_FLAG_BITS = 5
87_CD_COMPRESS_TYPE = 6
88_CD_TIME = 7
89_CD_DATE = 8
90_CD_CRC = 9
91_CD_COMPRESSED_SIZE = 10
92_CD_UNCOMPRESSED_SIZE = 11
93_CD_FILENAME_LENGTH = 12
94_CD_EXTRA_FIELD_LENGTH = 13
95_CD_COMMENT_LENGTH = 14
96_CD_DISK_NUMBER_START = 15
97_CD_INTERNAL_FILE_ATTRIBUTES = 16
98_CD_EXTERNAL_FILE_ATTRIBUTES = 17
99_CD_LOCAL_HEADER_OFFSET = 18
100
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101# The "local file header" structure, magic number, size, and indices
102# (section V.A in the format document)
103structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000104stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105sizeFileHeader = struct.calcsize(structFileHeader)
106
Fred Drake3e038e52001-02-28 17:56:26 +0000107_FH_SIGNATURE = 0
108_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000110_FH_GENERAL_PURPOSE_FLAG_BITS = 3
111_FH_COMPRESSION_METHOD = 4
112_FH_LAST_MOD_TIME = 5
113_FH_LAST_MOD_DATE = 6
114_FH_CRC = 7
115_FH_COMPRESSED_SIZE = 8
116_FH_UNCOMPRESSED_SIZE = 9
117_FH_FILENAME_LENGTH = 10
118_FH_EXTRA_FIELD_LENGTH = 11
119
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000120# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000121structEndArchive64Locator = "<4sLQL"
122stringEndArchive64Locator = b"PK\x06\x07"
123sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124
125# The "Zip64 end of central directory" record, magic number, size, and indices
126# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127structEndArchive64 = "<4sQ2H2L4Q"
128stringEndArchive64 = b"PK\x06\x06"
129sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130
131_CD64_SIGNATURE = 0
132_CD64_DIRECTORY_RECSIZE = 1
133_CD64_CREATE_VERSION = 2
134_CD64_EXTRACT_VERSION = 3
135_CD64_DISK_NUMBER = 4
136_CD64_DISK_NUMBER_START = 5
137_CD64_NUMBER_ENTRIES_THIS_DISK = 6
138_CD64_NUMBER_ENTRIES_TOTAL = 7
139_CD64_DIRECTORY_SIZE = 8
140_CD64_OFFSET_START_CENTDIR = 9
141
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000142def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000143 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000144 if _EndRecData(fp):
145 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000146 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000147 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000148 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000149
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000150def is_zipfile(filename):
151 """Quickly see if a file is a ZIP file by checking the magic number.
152
153 The filename argument may be a file or file-like object too.
154 """
155 result = False
156 try:
157 if hasattr(filename, "read"):
158 result = _check_zipfile(fp=filename)
159 else:
160 with open(filename, "rb") as fp:
161 result = _check_zipfile(fp)
162 except IOError:
163 pass
164 return result
165
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000166def _EndRecData64(fpin, offset, endrec):
167 """
168 Read the ZIP64 end-of-archive records and use that to update endrec
169 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000170 try:
171 fpin.seek(offset - sizeEndCentDir64Locator, 2)
172 except IOError:
173 # If the seek fails, the file is not large enough to contain a ZIP64
174 # end-of-archive record, so just return the end record we were given.
175 return endrec
176
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000177 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000178 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
179 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000180 return endrec
181
182 if diskno != 0 or disks != 1:
183 raise BadZipfile("zipfiles that span multiple disks are not supported")
184
185 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000186 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
187 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188 sig, sz, create_version, read_version, disk_num, disk_dir, \
189 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000190 struct.unpack(structEndArchive64, data)
191 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000192 return endrec
193
194 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000195 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000196 endrec[_ECD_DISK_NUMBER] = disk_num
197 endrec[_ECD_DISK_START] = disk_dir
198 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
199 endrec[_ECD_ENTRIES_TOTAL] = dircount2
200 endrec[_ECD_SIZE] = dirsize
201 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000205def _EndRecData(fpin):
206 """Return data from the "End of Central Directory" record, or None.
207
208 The data is a list of the nine items in the ZIP "End of central dir"
209 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210
211 # Determine file size
212 fpin.seek(0, 2)
213 filesize = fpin.tell()
214
215 # Check to see if this is ZIP file with no archive comment (the
216 # "end of central directory" structure should be the last item in the
217 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000218 try:
219 fpin.seek(-sizeEndCentDir, 2)
220 except IOError:
221 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000222 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000223 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000224 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000225 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000226 endrec=list(endrec)
227
228 # Append a blank comment and record start offset
229 endrec.append(b"")
230 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000232 # Try to read the "Zip64 end of central directory" structure
233 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234
235 # Either this is not a ZIP file, or it is a ZIP file with an archive
236 # comment. Search the end of the file for the "end of central directory"
237 # record signature. The comment is the last item in the ZIP file and may be
238 # up to 64K long. It is assumed that the "end of central directory" magic
239 # number does not appear in the comment.
240 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
241 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000242 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000243 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000244 if start >= 0:
245 # found the magic number; attempt to unpack and interpret
246 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000247 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000248 comment = data[start+sizeEndCentDir:]
249 # check that comment length is correct
250 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 # Append the archive comment and start offset
252 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000253 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000254
255 # Try to read the "Zip64 end of central directory" structure
256 return _EndRecData64(fpin, maxCommentStart + start - filesize,
257 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258
259 # Unable to find a valid end of central directory structure
260 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000261
Fred Drake484d7352000-10-02 21:14:52 +0000262
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000263class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000264 """Class with attributes describing each file in the ZIP archive."""
265
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000266 __slots__ = (
267 'orig_filename',
268 'filename',
269 'date_time',
270 'compress_type',
271 'comment',
272 'extra',
273 'create_system',
274 'create_version',
275 'extract_version',
276 'reserved',
277 'flag_bits',
278 'volume',
279 'internal_attr',
280 'external_attr',
281 'header_offset',
282 'CRC',
283 'compress_size',
284 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000285 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286 )
287
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000289 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290
291 # Terminate the file name at the first null byte. Null bytes in file
292 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000293 null_byte = filename.find(chr(0))
294 if null_byte >= 0:
295 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000296 # This is used to ensure paths in generated ZIP files always use
297 # forward slashes as the directory separator, as required by the
298 # ZIP format specification.
299 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000300 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000301
Greg Ward8e36d282003-06-18 00:53:06 +0000302 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000303 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000304 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000305 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000306 self.comment = b"" # Comment for each file
307 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000308 if sys.platform == 'win32':
309 self.create_system = 0 # System which created ZIP archive
310 else:
311 # Assume everything else is unix-y
312 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000313 self.create_version = 20 # Version which created ZIP archive
314 self.extract_version = 20 # Version needed to extract archive
315 self.reserved = 0 # Must be zero
316 self.flag_bits = 0 # ZIP flag bits
317 self.volume = 0 # Volume number of file header
318 self.internal_attr = 0 # Internal attributes
319 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000321 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000322 # CRC CRC-32 of the uncompressed file
323 # compress_size Size of the compressed file
324 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325
326 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000327 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 dt = self.date_time
329 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000330 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000332 # Set these to zero because we write them after the file data
333 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 else:
Tim Peterse1190062001-01-15 03:34:38 +0000335 CRC = self.CRC
336 compress_size = self.compress_size
337 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000338
339 extra = self.extra
340
341 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
342 # File is larger than what fits into a 4 byte integer,
343 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000344 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000345 extra = extra + struct.pack(fmt,
346 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000347 file_size = 0xffffffff
348 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000349 self.extract_version = max(45, self.extract_version)
350 self.create_version = max(45, self.extract_version)
351
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000352 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000353 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000354 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 self.compress_type, dostime, dosdate, CRC,
356 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000357 len(filename), len(extra))
358 return header + filename + extra
359
360 def _encodeFilenameFlags(self):
361 try:
362 return self.filename.encode('ascii'), self.flag_bits
363 except UnicodeEncodeError:
364 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365
366 def _decodeExtra(self):
367 # Try to decode the extra field.
368 extra = self.extra
369 unpack = struct.unpack
370 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000371 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000372 if tp == 1:
373 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000376 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000378 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 elif ln == 0:
380 counts = ()
381 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000382 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383
384 idx = 0
385
386 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000387 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000388 self.file_size = counts[idx]
389 idx += 1
390
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000391 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000392 self.compress_size = counts[idx]
393 idx += 1
394
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000395 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 old = self.header_offset
397 self.header_offset = counts[idx]
398 idx+=1
399
400 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000401
402
Thomas Wouterscf297e42007-02-23 15:07:44 +0000403class _ZipDecrypter:
404 """Class to handle decryption of files stored within a ZIP archive.
405
406 ZIP supports a password-based form of encryption. Even though known
407 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000408 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000409
410 Usage:
411 zd = _ZipDecrypter(mypwd)
412 plain_char = zd(cypher_char)
413 plain_text = map(zd, cypher_text)
414 """
415
416 def _GenerateCRCTable():
417 """Generate a CRC-32 table.
418
419 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
420 internal keys. We noticed that a direct implementation is faster than
421 relying on binascii.crc32().
422 """
423 poly = 0xedb88320
424 table = [0] * 256
425 for i in range(256):
426 crc = i
427 for j in range(8):
428 if crc & 1:
429 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
430 else:
431 crc = ((crc >> 1) & 0x7FFFFFFF)
432 table[i] = crc
433 return table
434 crctable = _GenerateCRCTable()
435
436 def _crc32(self, ch, crc):
437 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000438 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 def __init__(self, pwd):
441 self.key0 = 305419896
442 self.key1 = 591751049
443 self.key2 = 878082192
444 for p in pwd:
445 self._UpdateKeys(p)
446
447 def _UpdateKeys(self, c):
448 self.key0 = self._crc32(c, self.key0)
449 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
450 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000451 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000452
453 def __call__(self, c):
454 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000455 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000456 k = self.key2 | 2
457 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000458 self._UpdateKeys(c)
459 return c
460
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000461class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462 """File-like object for reading an archive member.
463 Is returned by ZipFile.open().
464 """
465
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000466 # Max size supported by decompressor.
467 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000468
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000469 # Read from compressed files in 4k blocks.
470 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000471
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000472 # Search for universal newlines or line chunks.
473 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
474
475 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
476 self._fileobj = fileobj
477 self._decrypter = decrypter
478
Ezio Melotti92b47432010-01-28 01:44:41 +0000479 self._compress_type = zipinfo.compress_type
480 self._compress_size = zipinfo.compress_size
481 self._compress_left = zipinfo.compress_size
482
483 if self._compress_type == ZIP_DEFLATED:
484 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000485 self._unconsumed = b''
486
487 self._readbuffer = b''
488 self._offset = 0
489
490 self._universal = 'U' in mode
491 self.newlines = None
492
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000493 # Adjust read size for encrypted files since the first 12 bytes
494 # are for the encryption/password information.
495 if self._decrypter is not None:
496 self._compress_left -= 12
497
498 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000499 self.name = zipinfo.filename
500
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000501 if hasattr(zipinfo, 'CRC'):
502 self._expected_crc = zipinfo.CRC
503 self._running_crc = crc32(b'') & 0xffffffff
504 else:
505 self._expected_crc = None
506
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000507 def readline(self, limit=-1):
508 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000509
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000510 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000511 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000512
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000513 if not self._universal and limit < 0:
514 # Shortcut common case - newline found in buffer.
515 i = self._readbuffer.find(b'\n', self._offset) + 1
516 if i > 0:
517 line = self._readbuffer[self._offset: i]
518 self._offset = i
519 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000520
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000521 if not self._universal:
522 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000524 line = b''
525 while limit < 0 or len(line) < limit:
526 readahead = self.peek(2)
527 if readahead == b'':
528 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000529
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000530 #
531 # Search for universal newlines or line chunks.
532 #
533 # The pattern returns either a line chunk or a newline, but not
534 # both. Combined with peek(2), we are assured that the sequence
535 # '\r\n' is always retrieved completely and never split into
536 # separate newlines - '\r', '\n' due to coincidental readaheads.
537 #
538 match = self.PATTERN.search(readahead)
539 newline = match.group('newline')
540 if newline is not None:
541 if self.newlines is None:
542 self.newlines = []
543 if newline not in self.newlines:
544 self.newlines.append(newline)
545 self._offset += len(newline)
546 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000547
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000548 chunk = match.group('chunk')
549 if limit >= 0:
550 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000552 self._offset += len(chunk)
553 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000555 return line
556
557 def peek(self, n=1):
558 """Returns buffered bytes without advancing the position."""
559 if n > len(self._readbuffer) - self._offset:
560 chunk = self.read(n)
561 self._offset -= len(chunk)
562
563 # Return up to 512 bytes to reduce allocation overhead for tight loops.
564 return self._readbuffer[self._offset: self._offset + 512]
565
566 def readable(self):
567 return True
568
569 def read(self, n=-1):
570 """Read and return up to n bytes.
571 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000572 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000573 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000574 if n is None:
575 n = -1
576 while True:
577 if n < 0:
578 data = self.read1(n)
579 elif n > len(buf):
580 data = self.read1(n - len(buf))
581 else:
582 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000583 if len(data) == 0:
584 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000585 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000586
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000587 def _update_crc(self, newdata, eof):
588 # Update the CRC using the given data.
589 if self._expected_crc is None:
590 # No need to compute the CRC if we don't have a reference value
591 return
592 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
593 # Check the CRC if we're at the end of the file
594 if eof and self._running_crc != self._expected_crc:
595 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
596
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000597 def read1(self, n):
598 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000599
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000600 # Simplify algorithm (branching) by transforming negative n to large n.
601 if n < 0 or n is None:
602 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000603
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000604 # Bytes available in read buffer.
605 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000606
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000607 # Read from file.
608 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
609 nbytes = n - len_readbuffer - len(self._unconsumed)
610 nbytes = max(nbytes, self.MIN_READ_SIZE)
611 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000612
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000613 data = self._fileobj.read(nbytes)
614 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000615
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616 if data and self._decrypter is not None:
617 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000619 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000620 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 self._readbuffer = self._readbuffer[self._offset:] + data
622 self._offset = 0
623 else:
624 # Prepare deflated bytes for decompression.
625 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000626
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000628 if (len(self._unconsumed) > 0 and n > len_readbuffer and
629 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630 data = self._decompressor.decompress(
631 self._unconsumed,
632 max(n - len_readbuffer, self.MIN_READ_SIZE)
633 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000634
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000636 eof = len(self._unconsumed) == 0 and self._compress_left == 0
637 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000640 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000641 self._readbuffer = self._readbuffer[self._offset:] + data
642 self._offset = 0
643
644 # Read from buffer.
645 data = self._readbuffer[self._offset: self._offset + n]
646 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000647 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000648
649
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000650
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000651class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000652 """ Class with methods to open, read, write, close, list zip files.
653
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000654 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000655
Fred Drake3d9091e2001-03-26 15:49:24 +0000656 file: Either the path to the file, or a file-like object.
657 If it is a path, the file will be opened and closed by ZipFile.
658 mode: The mode can be either read "r", write "w" or append "a".
659 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000660 allowZip64: if True ZipFile will create files with ZIP64 extensions when
661 needed, otherwise it will raise an exception when this would
662 be necessary.
663
Fred Drake3d9091e2001-03-26 15:49:24 +0000664 """
Fred Drake484d7352000-10-02 21:14:52 +0000665
Fred Drake90eac282001-02-28 05:29:34 +0000666 fp = None # Set here since __del__ checks it
667
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000669 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000670 if mode not in ("r", "w", "a"):
671 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
672
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000673 if compression == ZIP_STORED:
674 pass
675 elif compression == ZIP_DEFLATED:
676 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000677 raise RuntimeError(
678 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000679 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000680 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000681
682 self._allowZip64 = allowZip64
683 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000684 self.debug = 0 # Level of printing: 0 through 3
685 self.NameToInfo = {} # Find file info given name
686 self.filelist = [] # List of ZipInfo instances for archive
687 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000688 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000689 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000690 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000691
Fred Drake3d9091e2001-03-26 15:49:24 +0000692 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000693 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000694 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 self._filePassed = 0
696 self.filename = file
697 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000698 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000699 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000700 except IOError:
701 if mode == 'a':
702 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000703 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000704 else:
705 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000706 else:
707 self._filePassed = 1
708 self.fp = file
709 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000710
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 self._GetContents()
713 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000714 # set the modified flag so central directory gets written
715 # even if no files are added to the archive
716 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000718 try:
719 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000720 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000722 self.fp.seek(self.start_dir, 0)
Georg Brandl268e4d42010-10-14 06:59:45 +0000723 except BadZipfile:
724 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000725 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000726
727 # set the modified flag so central directory gets written
728 # even if no files are added to the archive
729 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000731 if not self._filePassed:
732 self.fp.close()
733 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000734 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000736 def __enter__(self):
737 return self
738
739 def __exit__(self, type, value, traceback):
740 self.close()
741
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000743 """Read the directory, making sure we close the file if the format
744 is bad."""
745 try:
746 self._RealGetContents()
747 except BadZipfile:
748 if not self._filePassed:
749 self.fp.close()
750 self.fp = None
751 raise
752
753 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000754 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000756 try:
757 endrec = _EndRecData(fp)
758 except IOError:
759 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000760 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000761 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000763 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000764 size_cd = endrec[_ECD_SIZE] # bytes in central directory
765 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
766 self.comment = endrec[_ECD_COMMENT] # archive comment
767
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000768 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000769 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000770 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
771 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000772 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
773
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000775 inferred = concat + offset_cd
776 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 # self.start_dir: Position of start of central directory
778 self.start_dir = offset_cd + concat
779 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000780 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000781 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 total = 0
783 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000784 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000785 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000786 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000787 centdir = struct.unpack(structCentralDir, centdir)
788 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000789 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000790 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000791 flags = centdir[5]
792 if flags & 0x800:
793 # UTF-8 file names extension
794 filename = filename.decode('utf-8')
795 else:
796 # Historical ZIP filename encoding
797 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000799 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000800 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
801 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000802 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 (x.create_version, x.create_system, x.extract_version, x.reserved,
804 x.flag_bits, x.compress_type, t, d,
805 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
806 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
807 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000808 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000810 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000811
812 x._decodeExtra()
813 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 self.filelist.append(x)
815 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000816
817 # update total bytes read from central directory
818 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
819 + centdir[_CD_EXTRA_FIELD_LENGTH]
820 + centdir[_CD_COMMENT_LENGTH])
821
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000823 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000824
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825
826 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000827 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000828 l = []
829 for data in self.filelist:
830 l.append(data.filename)
831 return l
832
833 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000834 """Return a list of class ZipInfo instances for files in the
835 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 return self.filelist
837
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000838 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000839 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000840 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
841 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000843 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000844 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
845 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000846
847 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000848 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000849 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 for zinfo in self.filelist:
851 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000852 # Read by chunks, to avoid an OverflowError or a
853 # MemoryError with very large embedded files.
854 f = self.open(zinfo.filename, "r")
855 while f.read(chunk_size): # Check CRC-32
856 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000857 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858 return zinfo.filename
859
860 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000861 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000862 info = self.NameToInfo.get(name)
863 if info is None:
864 raise KeyError(
865 'There is no item named %r in the archive' % name)
866
867 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000868
Thomas Wouterscf297e42007-02-23 15:07:44 +0000869 def setpassword(self, pwd):
870 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000871 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000872 self.pwd = pwd
873
874 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000875 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 return self.open(name, "r", pwd).read()
877
878 def open(self, name, mode="r", pwd=None):
879 """Return file-like object for 'name'."""
880 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000881 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000882 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000883 raise RuntimeError(
884 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000885
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 # Only open a new file for instances where we were not
887 # given a file object in the constructor
888 if self._filePassed:
889 zef_file = self.fp
890 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000891 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892
Georg Brandlb533e262008-05-25 18:19:30 +0000893 # Make sure we have an info object
894 if isinstance(name, ZipInfo):
895 # 'name' is already an info object
896 zinfo = name
897 else:
898 # Get info object for name
899 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900
901 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000902
903 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000904 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000905 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000906 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000907
908 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000910 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000912
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000913 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000914 raise BadZipfile(
915 'File name in directory %r and header %r differ.'
916 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000917
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 # check for encrypted flag & handle password
919 is_encrypted = zinfo.flag_bits & 0x1
920 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000921 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000922 if not pwd:
923 pwd = self.pwd
924 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000925 raise RuntimeError("File %s is encrypted, "
926 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927
Thomas Wouterscf297e42007-02-23 15:07:44 +0000928 zd = _ZipDecrypter(pwd)
929 # The first 12 bytes in the cypher stream is an encryption header
930 # used to strengthen the algorithm. The first 11 bytes are
931 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000932 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000933 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000935 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000936 if zinfo.flag_bits & 0x8:
937 # compare against the file type from extended local headers
938 check_byte = (zinfo._raw_time >> 8) & 0xff
939 else:
940 # compare against the CRC otherwise
941 check_byte = (zinfo.CRC >> 24) & 0xff
942 if h[11] != check_byte:
943 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000945 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000946
Christian Heimes790c8232008-01-07 21:14:23 +0000947 def extract(self, member, path=None, pwd=None):
948 """Extract a member from the archive to the current working directory,
949 using its full name. Its file information is extracted as accurately
950 as possible. `member' may be a filename or a ZipInfo object. You can
951 specify a different directory using `path'.
952 """
953 if not isinstance(member, ZipInfo):
954 member = self.getinfo(member)
955
956 if path is None:
957 path = os.getcwd()
958
959 return self._extract_member(member, path, pwd)
960
961 def extractall(self, path=None, members=None, pwd=None):
962 """Extract all members from the archive to the current working
963 directory. `path' specifies a different directory to extract to.
964 `members' is optional and must be a subset of the list returned
965 by namelist().
966 """
967 if members is None:
968 members = self.namelist()
969
970 for zipinfo in members:
971 self.extract(zipinfo, path, pwd)
972
973 def _extract_member(self, member, targetpath, pwd):
974 """Extract the ZipInfo object 'member' to a physical
975 file on the path targetpath.
976 """
977 # build the destination pathname, replacing
978 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000979 # Strip trailing path separator, unless it represents the root.
980 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
981 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000982 targetpath = targetpath[:-1]
983
984 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000985 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000986 targetpath = os.path.join(targetpath, member.filename[1:])
987 else:
988 targetpath = os.path.join(targetpath, member.filename)
989
990 targetpath = os.path.normpath(targetpath)
991
992 # Create all upper directories if necessary.
993 upperdirs = os.path.dirname(targetpath)
994 if upperdirs and not os.path.exists(upperdirs):
995 os.makedirs(upperdirs)
996
Martin v. Löwis59e47792009-01-24 14:10:07 +0000997 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000998 if not os.path.isdir(targetpath):
999 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001000 return targetpath
1001
Georg Brandlb533e262008-05-25 18:19:30 +00001002 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001003 target = open(targetpath, "wb")
1004 shutil.copyfileobj(source, target)
1005 source.close()
1006 target.close()
1007
1008 return targetpath
1009
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001011 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001012 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001013 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001014 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001016 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001018 raise RuntimeError(
1019 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001020 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001021 raise RuntimeError(
1022 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001024 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001025 if zinfo.file_size > ZIP64_LIMIT:
1026 if not self._allowZip64:
1027 raise LargeZipFile("Filesize would require ZIP64 extensions")
1028 if zinfo.header_offset > ZIP64_LIMIT:
1029 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001030 raise LargeZipFile(
1031 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032
1033 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001034 """Put the bytes from filename into the archive under the name
1035 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001036 if not self.fp:
1037 raise RuntimeError(
1038 "Attempt to write to ZIP archive that was already closed")
1039
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001041 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001042 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 date_time = mtime[0:6]
1044 # Create ZipInfo instance to store file information
1045 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001046 arcname = filename
1047 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1048 while arcname[0] in (os.sep, os.altsep):
1049 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001050 if isdir:
1051 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001052 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001053 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001055 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 else:
Tim Peterse1190062001-01-15 03:34:38 +00001057 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001058
1059 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001060 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001061 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062
1063 self._writecheck(zinfo)
1064 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001065
1066 if isdir:
1067 zinfo.file_size = 0
1068 zinfo.compress_size = 0
1069 zinfo.CRC = 0
1070 self.filelist.append(zinfo)
1071 self.NameToInfo[zinfo.filename] = zinfo
1072 self.fp.write(zinfo.FileHeader())
1073 return
1074
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001075 with open(filename, "rb") as fp:
1076 # Must overwrite CRC and sizes with correct data later
1077 zinfo.CRC = CRC = 0
1078 zinfo.compress_size = compress_size = 0
1079 zinfo.file_size = file_size = 0
1080 self.fp.write(zinfo.FileHeader())
1081 if zinfo.compress_type == ZIP_DEFLATED:
1082 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1083 zlib.DEFLATED, -15)
1084 else:
1085 cmpr = None
1086 while 1:
1087 buf = fp.read(1024 * 8)
1088 if not buf:
1089 break
1090 file_size = file_size + len(buf)
1091 CRC = crc32(buf, CRC) & 0xffffffff
1092 if cmpr:
1093 buf = cmpr.compress(buf)
1094 compress_size = compress_size + len(buf)
1095 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 if cmpr:
1097 buf = cmpr.flush()
1098 compress_size = compress_size + len(buf)
1099 self.fp.write(buf)
1100 zinfo.compress_size = compress_size
1101 else:
1102 zinfo.compress_size = file_size
1103 zinfo.CRC = CRC
1104 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001105 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001106 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001107 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001108 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001110 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 self.filelist.append(zinfo)
1112 self.NameToInfo[zinfo.filename] = zinfo
1113
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001114 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001115 """Write a file into the archive. The contents is 'data', which
1116 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1117 it is encoded as UTF-8 first.
1118 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001119 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001120 if isinstance(data, str):
1121 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001122 if not isinstance(zinfo_or_arcname, ZipInfo):
1123 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001124 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001125 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001126 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001127 else:
1128 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001129
1130 if not self.fp:
1131 raise RuntimeError(
1132 "Attempt to write to ZIP archive that was already closed")
1133
Guido van Rossum85825dc2007-08-27 17:03:28 +00001134 zinfo.file_size = len(data) # Uncompressed size
1135 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001136 if compress_type is not None:
1137 zinfo.compress_type = compress_type
1138
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001139 self._writecheck(zinfo)
1140 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001141 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 if zinfo.compress_type == ZIP_DEFLATED:
1143 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1144 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001145 data = co.compress(data) + co.flush()
1146 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001147 else:
1148 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001149 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001150 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001151 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001152 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001153 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001154 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001155 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001156 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157 self.filelist.append(zinfo)
1158 self.NameToInfo[zinfo.filename] = zinfo
1159
1160 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001161 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001162 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001163
1164 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001165 """Close the file, and for mode "w" and "a" write the ending
1166 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001167 if self.fp is None:
1168 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001169
1170 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 count = 0
1172 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001173 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001174 count = count + 1
1175 dt = zinfo.date_time
1176 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001177 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001178 extra = []
1179 if zinfo.file_size > ZIP64_LIMIT \
1180 or zinfo.compress_size > ZIP64_LIMIT:
1181 extra.append(zinfo.file_size)
1182 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001183 file_size = 0xffffffff
1184 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001185 else:
1186 file_size = zinfo.file_size
1187 compress_size = zinfo.compress_size
1188
1189 if zinfo.header_offset > ZIP64_LIMIT:
1190 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001191 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001192 else:
1193 header_offset = zinfo.header_offset
1194
1195 extra_data = zinfo.extra
1196 if extra:
1197 # Append a ZIP64 field to the extra's
1198 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001199 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001200 1, 8*len(extra), *extra) + extra_data
1201
1202 extract_version = max(45, zinfo.extract_version)
1203 create_version = max(45, zinfo.create_version)
1204 else:
1205 extract_version = zinfo.extract_version
1206 create_version = zinfo.create_version
1207
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001208 try:
1209 filename, flag_bits = zinfo._encodeFilenameFlags()
1210 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001211 stringCentralDir, create_version,
1212 zinfo.create_system, extract_version, zinfo.reserved,
1213 flag_bits, zinfo.compress_type, dostime, dosdate,
1214 zinfo.CRC, compress_size, file_size,
1215 len(filename), len(extra_data), len(zinfo.comment),
1216 0, zinfo.internal_attr, zinfo.external_attr,
1217 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001218 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001219 print((structCentralDir, stringCentralDir, create_version,
1220 zinfo.create_system, extract_version, zinfo.reserved,
1221 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1222 zinfo.CRC, compress_size, file_size,
1223 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1224 0, zinfo.internal_attr, zinfo.external_attr,
1225 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001226 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001228 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001229 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001230 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001231
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001232 pos2 = self.fp.tell()
1233 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001234 centDirCount = count
1235 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001236 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001237 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1238 centDirOffset > ZIP64_LIMIT or
1239 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001240 # Need to write the ZIP64 end-of-archive records
1241 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001242 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001243 44, 45, 45, 0, 0, centDirCount, centDirCount,
1244 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001245 self.fp.write(zip64endrec)
1246
1247 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001248 structEndArchive64Locator,
1249 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001250 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001251 centDirCount = min(centDirCount, 0xFFFF)
1252 centDirSize = min(centDirSize, 0xFFFFFFFF)
1253 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001254
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001255 # check for valid comment length
1256 if len(self.comment) >= ZIP_MAX_COMMENT:
1257 if self.debug > 0:
1258 msg = 'Archive comment is too long; truncating to %d bytes' \
1259 % ZIP_MAX_COMMENT
1260 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001261
Georg Brandl2ee470f2008-07-16 12:55:28 +00001262 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001263 0, 0, centDirCount, centDirCount,
1264 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001265 self.fp.write(endrec)
1266 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001267 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001268
Fred Drake3d9091e2001-03-26 15:49:24 +00001269 if not self._filePassed:
1270 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001271 self.fp = None
1272
1273
1274class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001275 """Class to create ZIP archives with Python library files and packages."""
1276
Georg Brandlfe991052009-09-16 15:54:04 +00001277 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001278 """Add all files from "pathname" to the ZIP archive.
1279
Fred Drake484d7352000-10-02 21:14:52 +00001280 If pathname is a package directory, search the directory and
1281 all package subdirectories recursively for all *.py and enter
1282 the modules into the archive. If pathname is a plain
1283 directory, listdir *.py and enter all modules. Else, pathname
1284 must be a Python *.py file and the module will be put into the
1285 archive. Added modules are always module.pyo or module.pyc.
1286 This method will compile the module.py into module.pyc if
1287 necessary.
1288 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001289 dir, name = os.path.split(pathname)
1290 if os.path.isdir(pathname):
1291 initname = os.path.join(pathname, "__init__.py")
1292 if os.path.isfile(initname):
1293 # This is a package directory, add it
1294 if basename:
1295 basename = "%s/%s" % (basename, name)
1296 else:
1297 basename = name
1298 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001299 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 fname, arcname = self._get_codename(initname[0:-3], basename)
1301 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001302 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 self.write(fname, arcname)
1304 dirlist = os.listdir(pathname)
1305 dirlist.remove("__init__.py")
1306 # Add all *.py files and package subdirectories
1307 for filename in dirlist:
1308 path = os.path.join(pathname, filename)
1309 root, ext = os.path.splitext(filename)
1310 if os.path.isdir(path):
1311 if os.path.isfile(os.path.join(path, "__init__.py")):
1312 # This is a package directory, add it
1313 self.writepy(path, basename) # Recursive call
1314 elif ext == ".py":
1315 fname, arcname = self._get_codename(path[0:-3],
1316 basename)
1317 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001318 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 self.write(fname, arcname)
1320 else:
1321 # This is NOT a package directory, add its files at top level
1322 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001323 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 for filename in os.listdir(pathname):
1325 path = os.path.join(pathname, filename)
1326 root, ext = os.path.splitext(filename)
1327 if ext == ".py":
1328 fname, arcname = self._get_codename(path[0:-3],
1329 basename)
1330 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001331 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001332 self.write(fname, arcname)
1333 else:
1334 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001335 raise RuntimeError(
1336 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 fname, arcname = self._get_codename(pathname[0:-3], basename)
1338 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001339 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 self.write(fname, arcname)
1341
1342 def _get_codename(self, pathname, basename):
1343 """Return (filename, archivename) for the path.
1344
Fred Drake484d7352000-10-02 21:14:52 +00001345 Given a module name path, return the correct file path and
1346 archive name, compiling if necessary. For example, given
1347 /python/lib/string, return (/python/lib/string.pyc, string).
1348 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 file_py = pathname + ".py"
1350 file_pyc = pathname + ".pyc"
1351 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001352 pycache_pyc = imp.cache_from_source(file_py, True)
1353 pycache_pyo = imp.cache_from_source(file_py, False)
1354 if (os.path.isfile(file_pyo) and
1355 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1356 # Use .pyo file.
1357 arcname = fname = file_pyo
1358 elif (os.path.isfile(file_pyc) and
1359 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1360 # Use .pyc file.
1361 arcname = fname = file_pyc
1362 elif (os.path.isfile(pycache_pyc) and
1363 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1364 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1365 # file name in the archive.
1366 fname = pycache_pyc
1367 arcname = file_pyc
1368 elif (os.path.isfile(pycache_pyo) and
1369 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1370 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1371 # file name in the archive.
1372 fname = pycache_pyo
1373 arcname = file_pyo
1374 else:
1375 # Compile py into PEP 3147 pyc file.
Fred Drake484d7352000-10-02 21:14:52 +00001376 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001377 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001378 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001379 try:
Barry Warsaw28a691b2010-04-17 00:19:56 +00001380 py_compile.compile(file_py, doraise=True)
1381 except py_compile.PyCompileError as error:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001382 print(err.msg)
Barry Warsaw28a691b2010-04-17 00:19:56 +00001383 fname = file_py
1384 else:
1385 fname = (pycache_pyc if __debug__ else pycache_pyo)
1386 arcname = (file_pyc if __debug__ else file_pyo)
1387 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388 if basename:
1389 archivename = "%s/%s" % (basename, archivename)
1390 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001391
1392
1393def main(args = None):
1394 import textwrap
1395 USAGE=textwrap.dedent("""\
1396 Usage:
1397 zipfile.py -l zipfile.zip # Show listing of a zipfile
1398 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1399 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1400 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1401 """)
1402 if args is None:
1403 args = sys.argv[1:]
1404
1405 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001406 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407 sys.exit(1)
1408
1409 if args[0] == '-l':
1410 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001411 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412 sys.exit(1)
1413 zf = ZipFile(args[1], 'r')
1414 zf.printdir()
1415 zf.close()
1416
1417 elif args[0] == '-t':
1418 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001419 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001420 sys.exit(1)
1421 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001422 badfile = zf.testzip()
1423 if badfile:
1424 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001425 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001426
1427 elif args[0] == '-e':
1428 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001429 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001430 sys.exit(1)
1431
1432 zf = ZipFile(args[1], 'r')
1433 out = args[2]
1434 for path in zf.namelist():
1435 if path.startswith('./'):
1436 tgt = os.path.join(out, path[2:])
1437 else:
1438 tgt = os.path.join(out, path)
1439
1440 tgtdir = os.path.dirname(tgt)
1441 if not os.path.exists(tgtdir):
1442 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001443 with open(tgt, 'wb') as fp:
1444 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001445 zf.close()
1446
1447 elif args[0] == '-c':
1448 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001449 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001450 sys.exit(1)
1451
1452 def addToZip(zf, path, zippath):
1453 if os.path.isfile(path):
1454 zf.write(path, zippath, ZIP_DEFLATED)
1455 elif os.path.isdir(path):
1456 for nm in os.listdir(path):
1457 addToZip(zf,
1458 os.path.join(path, nm), os.path.join(zippath, nm))
1459 # else: ignore
1460
1461 zf = ZipFile(args[1], 'w', allowZip64=True)
1462 for src in args[2:]:
1463 addToZip(zf, src, os.path.basename(src))
1464
1465 zf.close()
1466
1467if __name__ == "__main__":
1468 main()