blob: 149f2a60ee14f0609972c936ebdd371277c0d8e6 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
170 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
203 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000204 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000206 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000207 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 endrec=list(endrec)
209
210 # Append a blank comment and record start offset
211 endrec.append(b"")
212 endrec.append(filesize - sizeEndCentDir)
213 if endrec[_ECD_OFFSET] == 0xffffffff:
214 # the value for the "offset of the start of the central directory"
215 # indicates that there is a "Zip64 end of central directory"
216 # structure present, so go look for it
217 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
218
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220
221 # Either this is not a ZIP file, or it is a ZIP file with an archive
222 # comment. Search the end of the file for the "end of central directory"
223 # record signature. The comment is the last item in the ZIP file and may be
224 # up to 64K long. It is assumed that the "end of central directory" magic
225 # number does not appear in the comment.
226 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
227 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000228 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000229 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000230 if start >= 0:
231 # found the magic number; attempt to unpack and interpret
232 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000233 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 comment = data[start+sizeEndCentDir:]
235 # check that comment length is correct
236 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000237 # Append the archive comment and start offset
238 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239 endrec.append(maxCommentStart + start)
240 if endrec[_ECD_OFFSET] == 0xffffffff:
241 # There is apparently a "Zip64 end of central directory"
242 # structure present, so go look for it
243 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000244 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245
246 # Unable to find a valid end of central directory structure
247 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248
Fred Drake484d7352000-10-02 21:14:52 +0000249
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000250class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000251 """Class with attributes describing each file in the ZIP archive."""
252
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 __slots__ = (
254 'orig_filename',
255 'filename',
256 'date_time',
257 'compress_type',
258 'comment',
259 'extra',
260 'create_system',
261 'create_version',
262 'extract_version',
263 'reserved',
264 'flag_bits',
265 'volume',
266 'internal_attr',
267 'external_attr',
268 'header_offset',
269 'CRC',
270 'compress_size',
271 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000272 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273 )
274
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000275 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000276 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000277
278 # Terminate the file name at the first null byte. Null bytes in file
279 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000280 null_byte = filename.find(chr(0))
281 if null_byte >= 0:
282 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000283 # This is used to ensure paths in generated ZIP files always use
284 # forward slashes as the directory separator, as required by the
285 # ZIP format specification.
286 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000287 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288
Greg Ward8e36d282003-06-18 00:53:06 +0000289 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000291 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000293 self.comment = b"" # Comment for each file
294 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000295 if sys.platform == 'win32':
296 self.create_system = 0 # System which created ZIP archive
297 else:
298 # Assume everything else is unix-y
299 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000300 self.create_version = 20 # Version which created ZIP archive
301 self.extract_version = 20 # Version needed to extract archive
302 self.reserved = 0 # Must be zero
303 self.flag_bits = 0 # ZIP flag bits
304 self.volume = 0 # Volume number of file header
305 self.internal_attr = 0 # Internal attributes
306 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000308 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000309 # CRC CRC-32 of the uncompressed file
310 # compress_size Size of the compressed file
311 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000312
313 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000315 dt = self.date_time
316 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000317 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000319 # Set these to zero because we write them after the file data
320 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 else:
Tim Peterse1190062001-01-15 03:34:38 +0000322 CRC = self.CRC
323 compress_size = self.compress_size
324 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325
326 extra = self.extra
327
328 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
329 # File is larger than what fits into a 4 byte integer,
330 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000331 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332 extra = extra + struct.pack(fmt,
333 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000334 file_size = 0xffffffff
335 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336 self.extract_version = max(45, self.extract_version)
337 self.create_version = max(45, self.extract_version)
338
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000340 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000341 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 self.compress_type, dostime, dosdate, CRC,
343 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000344 len(filename), len(extra))
345 return header + filename + extra
346
347 def _encodeFilenameFlags(self):
348 try:
349 return self.filename.encode('ascii'), self.flag_bits
350 except UnicodeEncodeError:
351 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
353 def _decodeExtra(self):
354 # Try to decode the extra field.
355 extra = self.extra
356 unpack = struct.unpack
357 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000358 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 if tp == 1:
360 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000363 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 elif ln == 0:
367 counts = ()
368 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000369 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370
371 idx = 0
372
373 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 self.file_size = counts[idx]
376 idx += 1
377
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000378 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 self.compress_size = counts[idx]
380 idx += 1
381
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000382 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 old = self.header_offset
384 self.header_offset = counts[idx]
385 idx+=1
386
387 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388
389
Thomas Wouterscf297e42007-02-23 15:07:44 +0000390class _ZipDecrypter:
391 """Class to handle decryption of files stored within a ZIP archive.
392
393 ZIP supports a password-based form of encryption. Even though known
394 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000395 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000396
397 Usage:
398 zd = _ZipDecrypter(mypwd)
399 plain_char = zd(cypher_char)
400 plain_text = map(zd, cypher_text)
401 """
402
403 def _GenerateCRCTable():
404 """Generate a CRC-32 table.
405
406 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
407 internal keys. We noticed that a direct implementation is faster than
408 relying on binascii.crc32().
409 """
410 poly = 0xedb88320
411 table = [0] * 256
412 for i in range(256):
413 crc = i
414 for j in range(8):
415 if crc & 1:
416 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
417 else:
418 crc = ((crc >> 1) & 0x7FFFFFFF)
419 table[i] = crc
420 return table
421 crctable = _GenerateCRCTable()
422
423 def _crc32(self, ch, crc):
424 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000425 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000426
427 def __init__(self, pwd):
428 self.key0 = 305419896
429 self.key1 = 591751049
430 self.key2 = 878082192
431 for p in pwd:
432 self._UpdateKeys(p)
433
434 def _UpdateKeys(self, c):
435 self.key0 = self._crc32(c, self.key0)
436 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
437 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000438 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 def __call__(self, c):
441 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000442 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443 k = self.key2 | 2
444 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000445 self._UpdateKeys(c)
446 return c
447
Guido van Rossumd8faa362007-04-27 19:54:29 +0000448class ZipExtFile:
449 """File-like object for reading an archive member.
450 Is returned by ZipFile.open().
451 """
452
453 def __init__(self, fileobj, zipinfo, decrypt=None):
454 self.fileobj = fileobj
455 self.decrypter = decrypt
456 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000457 self.rawbuffer = b''
458 self.readbuffer = b''
459 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 self.eof = False
461 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000462 self.nlSeps = (b"\n", )
463 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000464
465 self.compress_type = zipinfo.compress_type
466 self.compress_size = zipinfo.compress_size
467
468 self.closed = False
469 self.mode = "r"
470 self.name = zipinfo.filename
471
472 # read from compressed files in 64k blocks
473 self.compreadsize = 64*1024
474 if self.compress_type == ZIP_DEFLATED:
475 self.dc = zlib.decompressobj(-15)
476
477 def set_univ_newlines(self, univ_newlines):
478 self.univ_newlines = univ_newlines
479
480 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000481 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000482 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000483 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000484
485 def __iter__(self):
486 return self
487
488 def __next__(self):
489 nextline = self.readline()
490 if not nextline:
491 raise StopIteration()
492
493 return nextline
494
495 def close(self):
496 self.closed = True
497
498 def _checkfornewline(self):
499 nl, nllen = -1, -1
500 if self.linebuffer:
501 # ugly check for cases where half of an \r\n pair was
502 # read on the last pass, and the \r was discarded. In this
503 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000504 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000505 self.linebuffer = self.linebuffer[1:]
506
507 for sep in self.nlSeps:
508 nl = self.linebuffer.find(sep)
509 if nl >= 0:
510 nllen = len(sep)
511 return nl, nllen
512
513 return nl, nllen
514
515 def readline(self, size = -1):
516 """Read a line with approx. size. If size is negative,
517 read a whole line.
518 """
519 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000520 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000521 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000522 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
524 # check for a newline already in buffer
525 nl, nllen = self._checkfornewline()
526
527 if nl >= 0:
528 # the next line was already in the buffer
529 nl = min(nl, size)
530 else:
531 # no line break in buffer - try to read more
532 size -= len(self.linebuffer)
533 while nl < 0 and size > 0:
534 buf = self.read(min(size, 100))
535 if not buf:
536 break
537 self.linebuffer += buf
538 size -= len(buf)
539
540 # check for a newline in buffer
541 nl, nllen = self._checkfornewline()
542
543 # we either ran out of bytes in the file, or
544 # met the specified size limit without finding a newline,
545 # so return current buffer
546 if nl < 0:
547 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000548 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000549 return s
550
551 buf = self.linebuffer[:nl]
552 self.lastdiscard = self.linebuffer[nl:nl + nllen]
553 self.linebuffer = self.linebuffer[nl + nllen:]
554
555 # line is always returned with \n as newline char (except possibly
556 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000557 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000558
559 def readlines(self, sizehint = -1):
560 """Return a list with all (following) lines. The sizehint parameter
561 is ignored in this implementation.
562 """
563 result = []
564 while True:
565 line = self.readline()
566 if not line: break
567 result.append(line)
568 return result
569
570 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000571 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000572 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000573 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000574
575 # determine read size
576 bytesToRead = self.compress_size - self.bytes_read
577
578 # adjust read size for encrypted files since the first 12 bytes
579 # are for the encryption/password information
580 if self.decrypter is not None:
581 bytesToRead -= 12
582
583 if size is not None and size >= 0:
584 if self.compress_type == ZIP_STORED:
585 lr = len(self.readbuffer)
586 bytesToRead = min(bytesToRead, size - lr)
587 elif self.compress_type == ZIP_DEFLATED:
588 if len(self.readbuffer) > size:
589 # the user has requested fewer bytes than we've already
590 # pulled through the decompressor; don't read any more
591 bytesToRead = 0
592 else:
593 # user will use up the buffer, so read some more
594 lr = len(self.rawbuffer)
595 bytesToRead = min(bytesToRead, self.compreadsize - lr)
596
597 # avoid reading past end of file contents
598 if bytesToRead + self.bytes_read > self.compress_size:
599 bytesToRead = self.compress_size - self.bytes_read
600
601 # try to read from file (if necessary)
602 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000603 data = self.fileobj.read(bytesToRead)
604 self.bytes_read += len(data)
605 try:
606 self.rawbuffer += data
607 except:
608 print(repr(self.fileobj), repr(self.rawbuffer),
609 repr(data))
610 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000611
612 # handle contents of raw buffer
613 if self.rawbuffer:
614 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000615 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000616
617 # decrypt new data if we were given an object to handle that
618 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000619 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000620
621 # decompress newly read data if necessary
622 if newdata and self.compress_type == ZIP_DEFLATED:
623 newdata = self.dc.decompress(newdata)
624 self.rawbuffer = self.dc.unconsumed_tail
625 if self.eof and len(self.rawbuffer) == 0:
626 # we're out of raw bytes (both from the file and
627 # the local buffer); flush just to make sure the
628 # decompressor is done
629 newdata += self.dc.flush()
630 # prevent decompressor from being used again
631 self.dc = None
632
633 self.readbuffer += newdata
634
635
636 # return what the user asked for
637 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000638 data = self.readbuffer
639 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000641 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000642 self.readbuffer = self.readbuffer[size:]
643
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000644 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000645
646
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000648 """ Class with methods to open, read, write, close, list zip files.
649
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000650 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000651
Fred Drake3d9091e2001-03-26 15:49:24 +0000652 file: Either the path to the file, or a file-like object.
653 If it is a path, the file will be opened and closed by ZipFile.
654 mode: The mode can be either read "r", write "w" or append "a".
655 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000656 allowZip64: if True ZipFile will create files with ZIP64 extensions when
657 needed, otherwise it will raise an exception when this would
658 be necessary.
659
Fred Drake3d9091e2001-03-26 15:49:24 +0000660 """
Fred Drake484d7352000-10-02 21:14:52 +0000661
Fred Drake90eac282001-02-28 05:29:34 +0000662 fp = None # Set here since __del__ checks it
663
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000664 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000665 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000666 if mode not in ("r", "w", "a"):
667 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
668
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000669 if compression == ZIP_STORED:
670 pass
671 elif compression == ZIP_DEFLATED:
672 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000673 raise RuntimeError(
674 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000675 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000676 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000677
678 self._allowZip64 = allowZip64
679 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000680 self.debug = 0 # Level of printing: 0 through 3
681 self.NameToInfo = {} # Find file info given name
682 self.filelist = [] # List of ZipInfo instances for archive
683 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000684 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000685 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000686 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000687
Fred Drake3d9091e2001-03-26 15:49:24 +0000688 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000689 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000690 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000691 self._filePassed = 0
692 self.filename = file
693 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000694 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000695 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000696 except IOError:
697 if mode == 'a':
698 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000699 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000700 else:
701 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000702 else:
703 self._filePassed = 1
704 self.fp = file
705 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000706
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000708 self._GetContents()
709 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000710 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000712 try: # See if file is a zip file
713 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000715 self.fp.seek(self.start_dir, 0)
716 except BadZipfile: # file is not a zip file, just append
717 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000719 if not self._filePassed:
720 self.fp.close()
721 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000722 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723
724 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000725 """Read the directory, making sure we close the file if the format
726 is bad."""
727 try:
728 self._RealGetContents()
729 except BadZipfile:
730 if not self._filePassed:
731 self.fp.close()
732 self.fp = None
733 raise
734
735 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000736 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000737 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000738 endrec = _EndRecData(fp)
739 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000740 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000742 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000743 size_cd = endrec[_ECD_SIZE] # bytes in central directory
744 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
745 self.comment = endrec[_ECD_COMMENT] # archive comment
746
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000747 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000748 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000749 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
750 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000751 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
752
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000754 inferred = concat + offset_cd
755 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 # self.start_dir: Position of start of central directory
757 self.start_dir = offset_cd + concat
758 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000759 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000760 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000761 total = 0
762 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000763 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000764 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000765 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 centdir = struct.unpack(structCentralDir, centdir)
767 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000768 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000769 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000770 flags = centdir[5]
771 if flags & 0x800:
772 # UTF-8 file names extension
773 filename = filename.decode('utf-8')
774 else:
775 # Historical ZIP filename encoding
776 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000778 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000779 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
780 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000781 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 (x.create_version, x.create_system, x.extract_version, x.reserved,
783 x.flag_bits, x.compress_type, t, d,
784 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
785 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
786 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000787 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000789 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000790
791 x._decodeExtra()
792 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 self.filelist.append(x)
794 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000795
796 # update total bytes read from central directory
797 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
798 + centdir[_CD_EXTRA_FIELD_LENGTH]
799 + centdir[_CD_COMMENT_LENGTH])
800
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000802 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000803
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804
805 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000806 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 l = []
808 for data in self.filelist:
809 l.append(data.filename)
810 return l
811
812 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000813 """Return a list of class ZipInfo instances for files in the
814 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000815 return self.filelist
816
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000817 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000818 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000819 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
820 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000822 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000823 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
824 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825
826 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000827 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000828 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829 for zinfo in self.filelist:
830 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000831 # Read by chunks, to avoid an OverflowError or a
832 # MemoryError with very large embedded files.
833 f = self.open(zinfo.filename, "r")
834 while f.read(chunk_size): # Check CRC-32
835 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000836 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 return zinfo.filename
838
839 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000840 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000841 info = self.NameToInfo.get(name)
842 if info is None:
843 raise KeyError(
844 'There is no item named %r in the archive' % name)
845
846 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000847
Thomas Wouterscf297e42007-02-23 15:07:44 +0000848 def setpassword(self, pwd):
849 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000850 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000851 self.pwd = pwd
852
853 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000854 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855 return self.open(name, "r", pwd).read()
856
857 def open(self, name, mode="r", pwd=None):
858 """Return file-like object for 'name'."""
859 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000860 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000862 raise RuntimeError(
863 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000864
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 # Only open a new file for instances where we were not
866 # given a file object in the constructor
867 if self._filePassed:
868 zef_file = self.fp
869 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000870 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871
Georg Brandlb533e262008-05-25 18:19:30 +0000872 # Make sure we have an info object
873 if isinstance(name, ZipInfo):
874 # 'name' is already an info object
875 zinfo = name
876 else:
877 # Get info object for name
878 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879
880 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000881
882 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000883 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000884 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000885 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000886
887 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000889 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000891
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000892 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000893 raise BadZipfile(
894 'File name in directory %r and header %r differ.'
895 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000896
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 # check for encrypted flag & handle password
898 is_encrypted = zinfo.flag_bits & 0x1
899 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000900 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 if not pwd:
902 pwd = self.pwd
903 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000904 raise RuntimeError("File %s is encrypted, "
905 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
Thomas Wouterscf297e42007-02-23 15:07:44 +0000907 zd = _ZipDecrypter(pwd)
908 # The first 12 bytes in the cypher stream is an encryption header
909 # used to strengthen the algorithm. The first 11 bytes are
910 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000911 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000912 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000914 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000915 if zinfo.flag_bits & 0x8:
916 # compare against the file type from extended local headers
917 check_byte = (zinfo._raw_time >> 8) & 0xff
918 else:
919 # compare against the CRC otherwise
920 check_byte = (zinfo.CRC >> 24) & 0xff
921 if h[11] != check_byte:
922 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923
924 # build and return a ZipExtFile
925 if zd is None:
926 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000927 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 zef = ZipExtFile(zef_file, zinfo, zd)
929
930 # set universal newlines on ZipExtFile if necessary
931 if "U" in mode:
932 zef.set_univ_newlines(True)
933 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000934
Christian Heimes790c8232008-01-07 21:14:23 +0000935 def extract(self, member, path=None, pwd=None):
936 """Extract a member from the archive to the current working directory,
937 using its full name. Its file information is extracted as accurately
938 as possible. `member' may be a filename or a ZipInfo object. You can
939 specify a different directory using `path'.
940 """
941 if not isinstance(member, ZipInfo):
942 member = self.getinfo(member)
943
944 if path is None:
945 path = os.getcwd()
946
947 return self._extract_member(member, path, pwd)
948
949 def extractall(self, path=None, members=None, pwd=None):
950 """Extract all members from the archive to the current working
951 directory. `path' specifies a different directory to extract to.
952 `members' is optional and must be a subset of the list returned
953 by namelist().
954 """
955 if members is None:
956 members = self.namelist()
957
958 for zipinfo in members:
959 self.extract(zipinfo, path, pwd)
960
961 def _extract_member(self, member, targetpath, pwd):
962 """Extract the ZipInfo object 'member' to a physical
963 file on the path targetpath.
964 """
965 # build the destination pathname, replacing
966 # forward slashes to platform specific separators.
967 if targetpath[-1:] == "/":
968 targetpath = targetpath[:-1]
969
970 # don't include leading "/" from file name if present
971 if os.path.isabs(member.filename):
972 targetpath = os.path.join(targetpath, member.filename[1:])
973 else:
974 targetpath = os.path.join(targetpath, member.filename)
975
976 targetpath = os.path.normpath(targetpath)
977
978 # Create all upper directories if necessary.
979 upperdirs = os.path.dirname(targetpath)
980 if upperdirs and not os.path.exists(upperdirs):
981 os.makedirs(upperdirs)
982
Georg Brandlb533e262008-05-25 18:19:30 +0000983 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000984 target = open(targetpath, "wb")
985 shutil.copyfileobj(source, target)
986 source.close()
987 target.close()
988
989 return targetpath
990
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000991 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000992 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000993 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000994 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000995 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000997 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000998 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000999 raise RuntimeError(
1000 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001002 raise RuntimeError(
1003 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001005 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001006 if zinfo.file_size > ZIP64_LIMIT:
1007 if not self._allowZip64:
1008 raise LargeZipFile("Filesize would require ZIP64 extensions")
1009 if zinfo.header_offset > ZIP64_LIMIT:
1010 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001011 raise LargeZipFile(
1012 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013
1014 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001015 """Put the bytes from filename into the archive under the name
1016 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001017 if not self.fp:
1018 raise RuntimeError(
1019 "Attempt to write to ZIP archive that was already closed")
1020
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001022 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 date_time = mtime[0:6]
1024 # Create ZipInfo instance to store file information
1025 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001026 arcname = filename
1027 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1028 while arcname[0] in (os.sep, os.altsep):
1029 arcname = arcname[1:]
1030 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001031 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001033 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 else:
Tim Peterse1190062001-01-15 03:34:38 +00001035 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001036
1037 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001038 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001039 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001040
1041 self._writecheck(zinfo)
1042 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001043 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001044 # Must overwrite CRC and sizes with correct data later
1045 zinfo.CRC = CRC = 0
1046 zinfo.compress_size = compress_size = 0
1047 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001049 if zinfo.compress_type == ZIP_DEFLATED:
1050 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1051 zlib.DEFLATED, -15)
1052 else:
1053 cmpr = None
1054 while 1:
1055 buf = fp.read(1024 * 8)
1056 if not buf:
1057 break
1058 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001059 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 if cmpr:
1061 buf = cmpr.compress(buf)
1062 compress_size = compress_size + len(buf)
1063 self.fp.write(buf)
1064 fp.close()
1065 if cmpr:
1066 buf = cmpr.flush()
1067 compress_size = compress_size + len(buf)
1068 self.fp.write(buf)
1069 zinfo.compress_size = compress_size
1070 else:
1071 zinfo.compress_size = file_size
1072 zinfo.CRC = CRC
1073 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001074 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001075 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001076 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001077 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001078 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001079 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001080 self.filelist.append(zinfo)
1081 self.NameToInfo[zinfo.filename] = zinfo
1082
Guido van Rossum85825dc2007-08-27 17:03:28 +00001083 def writestr(self, zinfo_or_arcname, data):
1084 """Write a file into the archive. The contents is 'data', which
1085 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1086 it is encoded as UTF-8 first.
1087 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001088 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001089 if isinstance(data, str):
1090 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001091 if not isinstance(zinfo_or_arcname, ZipInfo):
1092 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001093 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001094 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001095 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001096 else:
1097 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001098
1099 if not self.fp:
1100 raise RuntimeError(
1101 "Attempt to write to ZIP archive that was already closed")
1102
Guido van Rossum85825dc2007-08-27 17:03:28 +00001103 zinfo.file_size = len(data) # Uncompressed size
1104 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001105 self._writecheck(zinfo)
1106 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001107 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 if zinfo.compress_type == ZIP_DEFLATED:
1109 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1110 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001111 data = co.compress(data) + co.flush()
1112 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001113 else:
1114 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001115 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001117 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001118 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001120 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001121 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001122 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 self.filelist.append(zinfo)
1124 self.NameToInfo[zinfo.filename] = zinfo
1125
1126 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001127 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001128 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129
1130 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001131 """Close the file, and for mode "w" and "a" write the ending
1132 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001133 if self.fp is None:
1134 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001135
1136 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001137 count = 0
1138 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001139 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 count = count + 1
1141 dt = zinfo.date_time
1142 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001143 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001144 extra = []
1145 if zinfo.file_size > ZIP64_LIMIT \
1146 or zinfo.compress_size > ZIP64_LIMIT:
1147 extra.append(zinfo.file_size)
1148 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001149 file_size = 0xffffffff
1150 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001151 else:
1152 file_size = zinfo.file_size
1153 compress_size = zinfo.compress_size
1154
1155 if zinfo.header_offset > ZIP64_LIMIT:
1156 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001157 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158 else:
1159 header_offset = zinfo.header_offset
1160
1161 extra_data = zinfo.extra
1162 if extra:
1163 # Append a ZIP64 field to the extra's
1164 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001165 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001166 1, 8*len(extra), *extra) + extra_data
1167
1168 extract_version = max(45, zinfo.extract_version)
1169 create_version = max(45, zinfo.create_version)
1170 else:
1171 extract_version = zinfo.extract_version
1172 create_version = zinfo.create_version
1173
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001174 try:
1175 filename, flag_bits = zinfo._encodeFilenameFlags()
1176 centdir = struct.pack(structCentralDir,
Georg Brandl2ee470f2008-07-16 12:55:28 +00001177 stringCentralDir, create_version,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001178 zinfo.create_system, extract_version, zinfo.reserved,
1179 flag_bits, zinfo.compress_type, dostime, dosdate,
1180 zinfo.CRC, compress_size, file_size,
1181 len(filename), len(extra_data), len(zinfo.comment),
1182 0, zinfo.internal_attr, zinfo.external_attr,
1183 header_offset)
1184 except DeprecationWarning:
1185 print >>sys.stderr, (structCentralDir,
1186 stringCentralDir, create_version,
1187 zinfo.create_system, extract_version, zinfo.reserved,
1188 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1189 zinfo.CRC, compress_size, file_size,
1190 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1191 0, zinfo.internal_attr, zinfo.external_attr,
1192 header_offset)
1193 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001195 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001196 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001198
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199 pos2 = self.fp.tell()
1200 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001201 centDirCount = count
1202 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001203 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001204 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1205 centDirOffset > ZIP64_LIMIT or
1206 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001207 # Need to write the ZIP64 end-of-archive records
1208 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001209 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001210 44, 45, 45, 0, 0, centDirCount, centDirCount,
1211 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212 self.fp.write(zip64endrec)
1213
1214 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001215 structEndArchive64Locator,
1216 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001217 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001218 centDirCount = min(centDirCount, 0xFFFF)
1219 centDirSize = min(centDirSize, 0xFFFFFFFF)
1220 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001221
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001222 # check for valid comment length
1223 if len(self.comment) >= ZIP_MAX_COMMENT:
1224 if self.debug > 0:
1225 msg = 'Archive comment is too long; truncating to %d bytes' \
1226 % ZIP_MAX_COMMENT
1227 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001228
Georg Brandl2ee470f2008-07-16 12:55:28 +00001229 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001230 0, 0, centDirCount, centDirCount,
1231 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001232 self.fp.write(endrec)
1233 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001234 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001235
Fred Drake3d9091e2001-03-26 15:49:24 +00001236 if not self._filePassed:
1237 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001238 self.fp = None
1239
1240
1241class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001242 """Class to create ZIP archives with Python library files and packages."""
1243
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001244 def writepy(self, pathname, basename = ""):
1245 """Add all files from "pathname" to the ZIP archive.
1246
Fred Drake484d7352000-10-02 21:14:52 +00001247 If pathname is a package directory, search the directory and
1248 all package subdirectories recursively for all *.py and enter
1249 the modules into the archive. If pathname is a plain
1250 directory, listdir *.py and enter all modules. Else, pathname
1251 must be a Python *.py file and the module will be put into the
1252 archive. Added modules are always module.pyo or module.pyc.
1253 This method will compile the module.py into module.pyc if
1254 necessary.
1255 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001256 dir, name = os.path.split(pathname)
1257 if os.path.isdir(pathname):
1258 initname = os.path.join(pathname, "__init__.py")
1259 if os.path.isfile(initname):
1260 # This is a package directory, add it
1261 if basename:
1262 basename = "%s/%s" % (basename, name)
1263 else:
1264 basename = name
1265 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001266 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267 fname, arcname = self._get_codename(initname[0:-3], basename)
1268 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001269 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001270 self.write(fname, arcname)
1271 dirlist = os.listdir(pathname)
1272 dirlist.remove("__init__.py")
1273 # Add all *.py files and package subdirectories
1274 for filename in dirlist:
1275 path = os.path.join(pathname, filename)
1276 root, ext = os.path.splitext(filename)
1277 if os.path.isdir(path):
1278 if os.path.isfile(os.path.join(path, "__init__.py")):
1279 # This is a package directory, add it
1280 self.writepy(path, basename) # Recursive call
1281 elif ext == ".py":
1282 fname, arcname = self._get_codename(path[0:-3],
1283 basename)
1284 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001285 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 self.write(fname, arcname)
1287 else:
1288 # This is NOT a package directory, add its files at top level
1289 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001290 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001291 for filename in os.listdir(pathname):
1292 path = os.path.join(pathname, filename)
1293 root, ext = os.path.splitext(filename)
1294 if ext == ".py":
1295 fname, arcname = self._get_codename(path[0:-3],
1296 basename)
1297 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001298 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001299 self.write(fname, arcname)
1300 else:
1301 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001302 raise RuntimeError(
1303 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 fname, arcname = self._get_codename(pathname[0:-3], basename)
1305 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001306 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 self.write(fname, arcname)
1308
1309 def _get_codename(self, pathname, basename):
1310 """Return (filename, archivename) for the path.
1311
Fred Drake484d7352000-10-02 21:14:52 +00001312 Given a module name path, return the correct file path and
1313 archive name, compiling if necessary. For example, given
1314 /python/lib/string, return (/python/lib/string.pyc, string).
1315 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 file_py = pathname + ".py"
1317 file_pyc = pathname + ".pyc"
1318 file_pyo = pathname + ".pyo"
1319 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001320 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001321 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001322 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001323 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001324 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001326 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001327 try:
1328 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001329 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001330 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001331 fname = file_pyc
1332 else:
1333 fname = file_pyc
1334 archivename = os.path.split(fname)[1]
1335 if basename:
1336 archivename = "%s/%s" % (basename, archivename)
1337 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001338
1339
1340def main(args = None):
1341 import textwrap
1342 USAGE=textwrap.dedent("""\
1343 Usage:
1344 zipfile.py -l zipfile.zip # Show listing of a zipfile
1345 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1346 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1347 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1348 """)
1349 if args is None:
1350 args = sys.argv[1:]
1351
1352 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001353 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001354 sys.exit(1)
1355
1356 if args[0] == '-l':
1357 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001358 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001359 sys.exit(1)
1360 zf = ZipFile(args[1], 'r')
1361 zf.printdir()
1362 zf.close()
1363
1364 elif args[0] == '-t':
1365 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001366 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001367 sys.exit(1)
1368 zf = ZipFile(args[1], 'r')
1369 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001370 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371
1372 elif args[0] == '-e':
1373 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001374 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001375 sys.exit(1)
1376
1377 zf = ZipFile(args[1], 'r')
1378 out = args[2]
1379 for path in zf.namelist():
1380 if path.startswith('./'):
1381 tgt = os.path.join(out, path[2:])
1382 else:
1383 tgt = os.path.join(out, path)
1384
1385 tgtdir = os.path.dirname(tgt)
1386 if not os.path.exists(tgtdir):
1387 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001388 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389 fp.write(zf.read(path))
1390 fp.close()
1391 zf.close()
1392
1393 elif args[0] == '-c':
1394 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001395 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001396 sys.exit(1)
1397
1398 def addToZip(zf, path, zippath):
1399 if os.path.isfile(path):
1400 zf.write(path, zippath, ZIP_DEFLATED)
1401 elif os.path.isdir(path):
1402 for nm in os.listdir(path):
1403 addToZip(zf,
1404 os.path.join(path, nm), os.path.join(zippath, nm))
1405 # else: ignore
1406
1407 zf = ZipFile(args[1], 'w', allowZip64=True)
1408 for src in args[2:]:
1409 addToZip(zf, src, os.path.basename(src))
1410
1411 zf.close()
1412
1413if __name__ == "__main__":
1414 main()