blob: 0cfa37e88109fa5478e28e0edca8c3407f2cbcfe [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
170 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000208 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append(b"")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000216
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000270 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000291 self.comment = b"" # Comment for each file
292 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000329 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 try:
347 return self.filename.encode('ascii'), self.flag_bits
348 except UnicodeEncodeError:
349 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350
351 def _decodeExtra(self):
352 # Try to decode the extra field.
353 extra = self.extra
354 unpack = struct.unpack
355 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000356 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357 if tp == 1:
358 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000359 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000363 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 elif ln == 0:
365 counts = ()
366 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000367 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368
369 idx = 0
370
371 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 self.file_size = counts[idx]
374 idx += 1
375
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000376 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377 self.compress_size = counts[idx]
378 idx += 1
379
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000380 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000381 old = self.header_offset
382 self.header_offset = counts[idx]
383 idx+=1
384
385 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386
387
Thomas Wouterscf297e42007-02-23 15:07:44 +0000388class _ZipDecrypter:
389 """Class to handle decryption of files stored within a ZIP archive.
390
391 ZIP supports a password-based form of encryption. Even though known
392 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000393 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000394
395 Usage:
396 zd = _ZipDecrypter(mypwd)
397 plain_char = zd(cypher_char)
398 plain_text = map(zd, cypher_text)
399 """
400
401 def _GenerateCRCTable():
402 """Generate a CRC-32 table.
403
404 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
405 internal keys. We noticed that a direct implementation is faster than
406 relying on binascii.crc32().
407 """
408 poly = 0xedb88320
409 table = [0] * 256
410 for i in range(256):
411 crc = i
412 for j in range(8):
413 if crc & 1:
414 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
415 else:
416 crc = ((crc >> 1) & 0x7FFFFFFF)
417 table[i] = crc
418 return table
419 crctable = _GenerateCRCTable()
420
421 def _crc32(self, ch, crc):
422 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000423 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000424
425 def __init__(self, pwd):
426 self.key0 = 305419896
427 self.key1 = 591751049
428 self.key2 = 878082192
429 for p in pwd:
430 self._UpdateKeys(p)
431
432 def _UpdateKeys(self, c):
433 self.key0 = self._crc32(c, self.key0)
434 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
435 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000436 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000437
438 def __call__(self, c):
439 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000440 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000441 k = self.key2 | 2
442 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443 self._UpdateKeys(c)
444 return c
445
Guido van Rossumd8faa362007-04-27 19:54:29 +0000446class ZipExtFile:
447 """File-like object for reading an archive member.
448 Is returned by ZipFile.open().
449 """
450
451 def __init__(self, fileobj, zipinfo, decrypt=None):
452 self.fileobj = fileobj
453 self.decrypter = decrypt
454 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000455 self.rawbuffer = b''
456 self.readbuffer = b''
457 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000458 self.eof = False
459 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000460 self.nlSeps = (b"\n", )
461 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462
463 self.compress_type = zipinfo.compress_type
464 self.compress_size = zipinfo.compress_size
465
466 self.closed = False
467 self.mode = "r"
468 self.name = zipinfo.filename
469
470 # read from compressed files in 64k blocks
471 self.compreadsize = 64*1024
472 if self.compress_type == ZIP_DEFLATED:
473 self.dc = zlib.decompressobj(-15)
474
475 def set_univ_newlines(self, univ_newlines):
476 self.univ_newlines = univ_newlines
477
478 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000479 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000480 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000481 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000482
483 def __iter__(self):
484 return self
485
486 def __next__(self):
487 nextline = self.readline()
488 if not nextline:
489 raise StopIteration()
490
491 return nextline
492
493 def close(self):
494 self.closed = True
495
496 def _checkfornewline(self):
497 nl, nllen = -1, -1
498 if self.linebuffer:
499 # ugly check for cases where half of an \r\n pair was
500 # read on the last pass, and the \r was discarded. In this
501 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000502 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000503 self.linebuffer = self.linebuffer[1:]
504
505 for sep in self.nlSeps:
506 nl = self.linebuffer.find(sep)
507 if nl >= 0:
508 nllen = len(sep)
509 return nl, nllen
510
511 return nl, nllen
512
513 def readline(self, size = -1):
514 """Read a line with approx. size. If size is negative,
515 read a whole line.
516 """
517 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000518 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000519 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000521
522 # check for a newline already in buffer
523 nl, nllen = self._checkfornewline()
524
525 if nl >= 0:
526 # the next line was already in the buffer
527 nl = min(nl, size)
528 else:
529 # no line break in buffer - try to read more
530 size -= len(self.linebuffer)
531 while nl < 0 and size > 0:
532 buf = self.read(min(size, 100))
533 if not buf:
534 break
535 self.linebuffer += buf
536 size -= len(buf)
537
538 # check for a newline in buffer
539 nl, nllen = self._checkfornewline()
540
541 # we either ran out of bytes in the file, or
542 # met the specified size limit without finding a newline,
543 # so return current buffer
544 if nl < 0:
545 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000546 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000547 return s
548
549 buf = self.linebuffer[:nl]
550 self.lastdiscard = self.linebuffer[nl:nl + nllen]
551 self.linebuffer = self.linebuffer[nl + nllen:]
552
553 # line is always returned with \n as newline char (except possibly
554 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557 def readlines(self, sizehint = -1):
558 """Return a list with all (following) lines. The sizehint parameter
559 is ignored in this implementation.
560 """
561 result = []
562 while True:
563 line = self.readline()
564 if not line: break
565 result.append(line)
566 return result
567
568 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000569 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000571 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000572
573 # determine read size
574 bytesToRead = self.compress_size - self.bytes_read
575
576 # adjust read size for encrypted files since the first 12 bytes
577 # are for the encryption/password information
578 if self.decrypter is not None:
579 bytesToRead -= 12
580
581 if size is not None and size >= 0:
582 if self.compress_type == ZIP_STORED:
583 lr = len(self.readbuffer)
584 bytesToRead = min(bytesToRead, size - lr)
585 elif self.compress_type == ZIP_DEFLATED:
586 if len(self.readbuffer) > size:
587 # the user has requested fewer bytes than we've already
588 # pulled through the decompressor; don't read any more
589 bytesToRead = 0
590 else:
591 # user will use up the buffer, so read some more
592 lr = len(self.rawbuffer)
593 bytesToRead = min(bytesToRead, self.compreadsize - lr)
594
595 # avoid reading past end of file contents
596 if bytesToRead + self.bytes_read > self.compress_size:
597 bytesToRead = self.compress_size - self.bytes_read
598
599 # try to read from file (if necessary)
600 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000601 data = self.fileobj.read(bytesToRead)
602 self.bytes_read += len(data)
603 try:
604 self.rawbuffer += data
605 except:
606 print(repr(self.fileobj), repr(self.rawbuffer),
607 repr(data))
608 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000609
610 # handle contents of raw buffer
611 if self.rawbuffer:
612 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000613 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000614
615 # decrypt new data if we were given an object to handle that
616 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000617 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
619 # decompress newly read data if necessary
620 if newdata and self.compress_type == ZIP_DEFLATED:
621 newdata = self.dc.decompress(newdata)
622 self.rawbuffer = self.dc.unconsumed_tail
623 if self.eof and len(self.rawbuffer) == 0:
624 # we're out of raw bytes (both from the file and
625 # the local buffer); flush just to make sure the
626 # decompressor is done
627 newdata += self.dc.flush()
628 # prevent decompressor from being used again
629 self.dc = None
630
631 self.readbuffer += newdata
632
633
634 # return what the user asked for
635 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000636 data = self.readbuffer
637 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000638 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000639 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640 self.readbuffer = self.readbuffer[size:]
641
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000642 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000643
644
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000646 """ Class with methods to open, read, write, close, list zip files.
647
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000648 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000649
Fred Drake3d9091e2001-03-26 15:49:24 +0000650 file: Either the path to the file, or a file-like object.
651 If it is a path, the file will be opened and closed by ZipFile.
652 mode: The mode can be either read "r", write "w" or append "a".
653 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000654 allowZip64: if True ZipFile will create files with ZIP64 extensions when
655 needed, otherwise it will raise an exception when this would
656 be necessary.
657
Fred Drake3d9091e2001-03-26 15:49:24 +0000658 """
Fred Drake484d7352000-10-02 21:14:52 +0000659
Fred Drake90eac282001-02-28 05:29:34 +0000660 fp = None # Set here since __del__ checks it
661
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000662 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000663 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000664 if mode not in ("r", "w", "a"):
665 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
666
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000667 if compression == ZIP_STORED:
668 pass
669 elif compression == ZIP_DEFLATED:
670 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000671 raise RuntimeError(
672 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000673 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000674 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000675
676 self._allowZip64 = allowZip64
677 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000678 self.debug = 0 # Level of printing: 0 through 3
679 self.NameToInfo = {} # Find file info given name
680 self.filelist = [] # List of ZipInfo instances for archive
681 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000682 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000683 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000684 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000685
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000687 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000688 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000689 self._filePassed = 0
690 self.filename = file
691 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000692 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000693 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000694 except IOError:
695 if mode == 'a':
696 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000697 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000698 else:
699 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 else:
701 self._filePassed = 1
702 self.fp = file
703 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000704
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000705 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 self._GetContents()
707 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 try: # See if file is a zip file
711 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000713 self.fp.seek(self.start_dir, 0)
714 except BadZipfile: # file is not a zip file, just append
715 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000717 if not self._filePassed:
718 self.fp.close()
719 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000720 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721
722 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000723 """Read the directory, making sure we close the file if the format
724 is bad."""
725 try:
726 self._RealGetContents()
727 except BadZipfile:
728 if not self._filePassed:
729 self.fp.close()
730 self.fp = None
731 raise
732
733 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000734 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000736 endrec = _EndRecData(fp)
737 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000738 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000740 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000741 size_cd = endrec[_ECD_SIZE] # bytes in central directory
742 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
743 self.comment = endrec[_ECD_COMMENT] # archive comment
744
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000745 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000746 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000747 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
748 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000749 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
750
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000752 inferred = concat + offset_cd
753 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000754 # self.start_dir: Position of start of central directory
755 self.start_dir = offset_cd + concat
756 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000757 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000758 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000759 total = 0
760 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000761 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000762 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000763 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000764 centdir = struct.unpack(structCentralDir, centdir)
765 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000766 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000767 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000768 flags = centdir[5]
769 if flags & 0x800:
770 # UTF-8 file names extension
771 filename = filename.decode('utf-8')
772 else:
773 # Historical ZIP filename encoding
774 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000776 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000777 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
778 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000779 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 (x.create_version, x.create_system, x.extract_version, x.reserved,
781 x.flag_bits, x.compress_type, t, d,
782 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
783 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
784 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000785 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000787 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000788
789 x._decodeExtra()
790 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 self.filelist.append(x)
792 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000793
794 # update total bytes read from central directory
795 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
796 + centdir[_CD_EXTRA_FIELD_LENGTH]
797 + centdir[_CD_COMMENT_LENGTH])
798
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000800 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000801
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802
803 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000804 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 l = []
806 for data in self.filelist:
807 l.append(data.filename)
808 return l
809
810 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000811 """Return a list of class ZipInfo instances for files in the
812 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 return self.filelist
814
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000815 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000816 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000817 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
818 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000820 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000821 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
822 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823
824 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000825 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000826 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 for zinfo in self.filelist:
828 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000829 # Read by chunks, to avoid an OverflowError or a
830 # MemoryError with very large embedded files.
831 f = self.open(zinfo.filename, "r")
832 while f.read(chunk_size): # Check CRC-32
833 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000834 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 return zinfo.filename
836
837 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000838 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000839 info = self.NameToInfo.get(name)
840 if info is None:
841 raise KeyError(
842 'There is no item named %r in the archive' % name)
843
844 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000845
Thomas Wouterscf297e42007-02-23 15:07:44 +0000846 def setpassword(self, pwd):
847 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000848 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000849 self.pwd = pwd
850
851 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000852 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853 return self.open(name, "r", pwd).read()
854
855 def open(self, name, mode="r", pwd=None):
856 """Return file-like object for 'name'."""
857 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000858 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000859 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000860 raise RuntimeError(
861 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000862
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 # Only open a new file for instances where we were not
864 # given a file object in the constructor
865 if self._filePassed:
866 zef_file = self.fp
867 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000868 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869
Georg Brandlb533e262008-05-25 18:19:30 +0000870 # Make sure we have an info object
871 if isinstance(name, ZipInfo):
872 # 'name' is already an info object
873 zinfo = name
874 else:
875 # Get info object for name
876 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877
878 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000879
880 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000881 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000882 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000883 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000884
885 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000887 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000889
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000890 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000891 raise BadZipfile(
892 'File name in directory %r and header %r differ.'
893 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000894
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895 # check for encrypted flag & handle password
896 is_encrypted = zinfo.flag_bits & 0x1
897 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000898 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 if not pwd:
900 pwd = self.pwd
901 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000902 raise RuntimeError("File %s is encrypted, "
903 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904
Thomas Wouterscf297e42007-02-23 15:07:44 +0000905 zd = _ZipDecrypter(pwd)
906 # The first 12 bytes in the cypher stream is an encryption header
907 # used to strengthen the algorithm. The first 11 bytes are
908 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000909 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000910 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000912 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000913 if zinfo.flag_bits & 0x8:
914 # compare against the file type from extended local headers
915 check_byte = (zinfo._raw_time >> 8) & 0xff
916 else:
917 # compare against the CRC otherwise
918 check_byte = (zinfo.CRC >> 24) & 0xff
919 if h[11] != check_byte:
920 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
922 # build and return a ZipExtFile
923 if zd is None:
924 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 zef = ZipExtFile(zef_file, zinfo, zd)
927
928 # set universal newlines on ZipExtFile if necessary
929 if "U" in mode:
930 zef.set_univ_newlines(True)
931 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000932
Christian Heimes790c8232008-01-07 21:14:23 +0000933 def extract(self, member, path=None, pwd=None):
934 """Extract a member from the archive to the current working directory,
935 using its full name. Its file information is extracted as accurately
936 as possible. `member' may be a filename or a ZipInfo object. You can
937 specify a different directory using `path'.
938 """
939 if not isinstance(member, ZipInfo):
940 member = self.getinfo(member)
941
942 if path is None:
943 path = os.getcwd()
944
945 return self._extract_member(member, path, pwd)
946
947 def extractall(self, path=None, members=None, pwd=None):
948 """Extract all members from the archive to the current working
949 directory. `path' specifies a different directory to extract to.
950 `members' is optional and must be a subset of the list returned
951 by namelist().
952 """
953 if members is None:
954 members = self.namelist()
955
956 for zipinfo in members:
957 self.extract(zipinfo, path, pwd)
958
959 def _extract_member(self, member, targetpath, pwd):
960 """Extract the ZipInfo object 'member' to a physical
961 file on the path targetpath.
962 """
963 # build the destination pathname, replacing
964 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000965 # Strip trailing path separator, unless it represents the root.
966 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
967 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000968 targetpath = targetpath[:-1]
969
970 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000971 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000972 targetpath = os.path.join(targetpath, member.filename[1:])
973 else:
974 targetpath = os.path.join(targetpath, member.filename)
975
976 targetpath = os.path.normpath(targetpath)
977
978 # Create all upper directories if necessary.
979 upperdirs = os.path.dirname(targetpath)
980 if upperdirs and not os.path.exists(upperdirs):
981 os.makedirs(upperdirs)
982
Martin v. Löwis59e47792009-01-24 14:10:07 +0000983 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000984 if not os.path.isdir(targetpath):
985 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000986 return targetpath
987
Georg Brandlb533e262008-05-25 18:19:30 +0000988 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000989 target = open(targetpath, "wb")
990 shutil.copyfileobj(source, target)
991 source.close()
992 target.close()
993
994 return targetpath
995
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000997 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000998 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000999 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001000 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001002 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001004 raise RuntimeError(
1005 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001007 raise RuntimeError(
1008 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001010 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001011 if zinfo.file_size > ZIP64_LIMIT:
1012 if not self._allowZip64:
1013 raise LargeZipFile("Filesize would require ZIP64 extensions")
1014 if zinfo.header_offset > ZIP64_LIMIT:
1015 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001016 raise LargeZipFile(
1017 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018
1019 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001020 """Put the bytes from filename into the archive under the name
1021 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001022 if not self.fp:
1023 raise RuntimeError(
1024 "Attempt to write to ZIP archive that was already closed")
1025
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001027 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001028 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 date_time = mtime[0:6]
1030 # Create ZipInfo instance to store file information
1031 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001032 arcname = filename
1033 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1034 while arcname[0] in (os.sep, os.altsep):
1035 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001036 if isdir:
1037 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001038 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001039 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001041 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042 else:
Tim Peterse1190062001-01-15 03:34:38 +00001043 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001044
1045 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001046 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001047 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001048
1049 self._writecheck(zinfo)
1050 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001051
1052 if isdir:
1053 zinfo.file_size = 0
1054 zinfo.compress_size = 0
1055 zinfo.CRC = 0
1056 self.filelist.append(zinfo)
1057 self.NameToInfo[zinfo.filename] = zinfo
1058 self.fp.write(zinfo.FileHeader())
1059 return
1060
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001061 with open(filename, "rb") as fp:
1062 # Must overwrite CRC and sizes with correct data later
1063 zinfo.CRC = CRC = 0
1064 zinfo.compress_size = compress_size = 0
1065 zinfo.file_size = file_size = 0
1066 self.fp.write(zinfo.FileHeader())
1067 if zinfo.compress_type == ZIP_DEFLATED:
1068 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1069 zlib.DEFLATED, -15)
1070 else:
1071 cmpr = None
1072 while 1:
1073 buf = fp.read(1024 * 8)
1074 if not buf:
1075 break
1076 file_size = file_size + len(buf)
1077 CRC = crc32(buf, CRC) & 0xffffffff
1078 if cmpr:
1079 buf = cmpr.compress(buf)
1080 compress_size = compress_size + len(buf)
1081 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 if cmpr:
1083 buf = cmpr.flush()
1084 compress_size = compress_size + len(buf)
1085 self.fp.write(buf)
1086 zinfo.compress_size = compress_size
1087 else:
1088 zinfo.compress_size = file_size
1089 zinfo.CRC = CRC
1090 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001091 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001092 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001093 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001094 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001096 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 self.filelist.append(zinfo)
1098 self.NameToInfo[zinfo.filename] = zinfo
1099
Guido van Rossum85825dc2007-08-27 17:03:28 +00001100 def writestr(self, zinfo_or_arcname, data):
1101 """Write a file into the archive. The contents is 'data', which
1102 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1103 it is encoded as UTF-8 first.
1104 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001105 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001106 if isinstance(data, str):
1107 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001108 if not isinstance(zinfo_or_arcname, ZipInfo):
1109 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001110 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001111 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001112 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001113 else:
1114 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001115
1116 if not self.fp:
1117 raise RuntimeError(
1118 "Attempt to write to ZIP archive that was already closed")
1119
Guido van Rossum85825dc2007-08-27 17:03:28 +00001120 zinfo.file_size = len(data) # Uncompressed size
1121 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001122 self._writecheck(zinfo)
1123 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001124 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 if zinfo.compress_type == ZIP_DEFLATED:
1126 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1127 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001128 data = co.compress(data) + co.flush()
1129 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001130 else:
1131 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001132 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001134 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001135 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001136 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001137 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001138 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001139 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 self.filelist.append(zinfo)
1141 self.NameToInfo[zinfo.filename] = zinfo
1142
1143 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001144 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001145 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146
1147 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001148 """Close the file, and for mode "w" and "a" write the ending
1149 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001150 if self.fp is None:
1151 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001152
1153 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 count = 0
1155 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001156 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157 count = count + 1
1158 dt = zinfo.date_time
1159 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001160 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 extra = []
1162 if zinfo.file_size > ZIP64_LIMIT \
1163 or zinfo.compress_size > ZIP64_LIMIT:
1164 extra.append(zinfo.file_size)
1165 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001166 file_size = 0xffffffff
1167 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001168 else:
1169 file_size = zinfo.file_size
1170 compress_size = zinfo.compress_size
1171
1172 if zinfo.header_offset > ZIP64_LIMIT:
1173 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001174 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001175 else:
1176 header_offset = zinfo.header_offset
1177
1178 extra_data = zinfo.extra
1179 if extra:
1180 # Append a ZIP64 field to the extra's
1181 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001182 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001183 1, 8*len(extra), *extra) + extra_data
1184
1185 extract_version = max(45, zinfo.extract_version)
1186 create_version = max(45, zinfo.create_version)
1187 else:
1188 extract_version = zinfo.extract_version
1189 create_version = zinfo.create_version
1190
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001191 try:
1192 filename, flag_bits = zinfo._encodeFilenameFlags()
1193 centdir = struct.pack(structCentralDir,
Georg Brandl2ee470f2008-07-16 12:55:28 +00001194 stringCentralDir, create_version,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001195 zinfo.create_system, extract_version, zinfo.reserved,
1196 flag_bits, zinfo.compress_type, dostime, dosdate,
1197 zinfo.CRC, compress_size, file_size,
1198 len(filename), len(extra_data), len(zinfo.comment),
1199 0, zinfo.internal_attr, zinfo.external_attr,
1200 header_offset)
1201 except DeprecationWarning:
1202 print >>sys.stderr, (structCentralDir,
1203 stringCentralDir, create_version,
1204 zinfo.create_system, extract_version, zinfo.reserved,
1205 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1206 zinfo.CRC, compress_size, file_size,
1207 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1208 0, zinfo.internal_attr, zinfo.external_attr,
1209 header_offset)
1210 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001212 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001213 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001215
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001216 pos2 = self.fp.tell()
1217 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001218 centDirCount = count
1219 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001220 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001221 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1222 centDirOffset > ZIP64_LIMIT or
1223 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001224 # Need to write the ZIP64 end-of-archive records
1225 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001226 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001227 44, 45, 45, 0, 0, centDirCount, centDirCount,
1228 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001229 self.fp.write(zip64endrec)
1230
1231 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001232 structEndArchive64Locator,
1233 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001234 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001235 centDirCount = min(centDirCount, 0xFFFF)
1236 centDirSize = min(centDirSize, 0xFFFFFFFF)
1237 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001238
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001239 # check for valid comment length
1240 if len(self.comment) >= ZIP_MAX_COMMENT:
1241 if self.debug > 0:
1242 msg = 'Archive comment is too long; truncating to %d bytes' \
1243 % ZIP_MAX_COMMENT
1244 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001245
Georg Brandl2ee470f2008-07-16 12:55:28 +00001246 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001247 0, 0, centDirCount, centDirCount,
1248 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001249 self.fp.write(endrec)
1250 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001251 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001252
Fred Drake3d9091e2001-03-26 15:49:24 +00001253 if not self._filePassed:
1254 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001255 self.fp = None
1256
1257
1258class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001259 """Class to create ZIP archives with Python library files and packages."""
1260
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 def writepy(self, pathname, basename = ""):
1262 """Add all files from "pathname" to the ZIP archive.
1263
Fred Drake484d7352000-10-02 21:14:52 +00001264 If pathname is a package directory, search the directory and
1265 all package subdirectories recursively for all *.py and enter
1266 the modules into the archive. If pathname is a plain
1267 directory, listdir *.py and enter all modules. Else, pathname
1268 must be a Python *.py file and the module will be put into the
1269 archive. Added modules are always module.pyo or module.pyc.
1270 This method will compile the module.py into module.pyc if
1271 necessary.
1272 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001273 dir, name = os.path.split(pathname)
1274 if os.path.isdir(pathname):
1275 initname = os.path.join(pathname, "__init__.py")
1276 if os.path.isfile(initname):
1277 # This is a package directory, add it
1278 if basename:
1279 basename = "%s/%s" % (basename, name)
1280 else:
1281 basename = name
1282 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001283 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284 fname, arcname = self._get_codename(initname[0:-3], basename)
1285 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001286 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 self.write(fname, arcname)
1288 dirlist = os.listdir(pathname)
1289 dirlist.remove("__init__.py")
1290 # Add all *.py files and package subdirectories
1291 for filename in dirlist:
1292 path = os.path.join(pathname, filename)
1293 root, ext = os.path.splitext(filename)
1294 if os.path.isdir(path):
1295 if os.path.isfile(os.path.join(path, "__init__.py")):
1296 # This is a package directory, add it
1297 self.writepy(path, basename) # Recursive call
1298 elif ext == ".py":
1299 fname, arcname = self._get_codename(path[0:-3],
1300 basename)
1301 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001302 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 self.write(fname, arcname)
1304 else:
1305 # This is NOT a package directory, add its files at top level
1306 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001307 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 for filename in os.listdir(pathname):
1309 path = os.path.join(pathname, filename)
1310 root, ext = os.path.splitext(filename)
1311 if ext == ".py":
1312 fname, arcname = self._get_codename(path[0:-3],
1313 basename)
1314 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001315 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 self.write(fname, arcname)
1317 else:
1318 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001319 raise RuntimeError(
1320 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 fname, arcname = self._get_codename(pathname[0:-3], basename)
1322 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001323 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 self.write(fname, arcname)
1325
1326 def _get_codename(self, pathname, basename):
1327 """Return (filename, archivename) for the path.
1328
Fred Drake484d7352000-10-02 21:14:52 +00001329 Given a module name path, return the correct file path and
1330 archive name, compiling if necessary. For example, given
1331 /python/lib/string, return (/python/lib/string.pyc, string).
1332 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 file_py = pathname + ".py"
1334 file_pyc = pathname + ".pyc"
1335 file_pyo = pathname + ".pyo"
1336 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001337 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001338 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001340 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001341 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001343 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001344 try:
1345 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001346 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001347 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001348 fname = file_pyc
1349 else:
1350 fname = file_pyc
1351 archivename = os.path.split(fname)[1]
1352 if basename:
1353 archivename = "%s/%s" % (basename, archivename)
1354 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001355
1356
1357def main(args = None):
1358 import textwrap
1359 USAGE=textwrap.dedent("""\
1360 Usage:
1361 zipfile.py -l zipfile.zip # Show listing of a zipfile
1362 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1363 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1364 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1365 """)
1366 if args is None:
1367 args = sys.argv[1:]
1368
1369 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001370 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371 sys.exit(1)
1372
1373 if args[0] == '-l':
1374 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001375 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001376 sys.exit(1)
1377 zf = ZipFile(args[1], 'r')
1378 zf.printdir()
1379 zf.close()
1380
1381 elif args[0] == '-t':
1382 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001383 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001384 sys.exit(1)
1385 zf = ZipFile(args[1], 'r')
1386 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001387 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001388
1389 elif args[0] == '-e':
1390 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001391 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001392 sys.exit(1)
1393
1394 zf = ZipFile(args[1], 'r')
1395 out = args[2]
1396 for path in zf.namelist():
1397 if path.startswith('./'):
1398 tgt = os.path.join(out, path[2:])
1399 else:
1400 tgt = os.path.join(out, path)
1401
1402 tgtdir = os.path.dirname(tgt)
1403 if not os.path.exists(tgtdir):
1404 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001405 with open(tgt, 'wb') as fp:
1406 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407 zf.close()
1408
1409 elif args[0] == '-c':
1410 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001411 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412 sys.exit(1)
1413
1414 def addToZip(zf, path, zippath):
1415 if os.path.isfile(path):
1416 zf.write(path, zippath, ZIP_DEFLATED)
1417 elif os.path.isdir(path):
1418 for nm in os.listdir(path):
1419 addToZip(zf,
1420 os.path.join(path, nm), os.path.join(zippath, nm))
1421 # else: ignore
1422
1423 zf = ZipFile(args[1], 'w', allowZip64=True)
1424 for src in args[2:]:
1425 addToZip(zf, src, os.path.basename(src))
1426
1427 zf.close()
1428
1429if __name__ == "__main__":
1430 main()