blob: a54a354d0ce9bb4a44a9e753dff132a53f46eaa0 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
170 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
203 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000204 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000206 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000207 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 endrec=list(endrec)
209
210 # Append a blank comment and record start offset
211 endrec.append(b"")
212 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000214 # Try to read the "Zip64 end of central directory" structure
215 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000216
217 # Either this is not a ZIP file, or it is a ZIP file with an archive
218 # comment. Search the end of the file for the "end of central directory"
219 # record signature. The comment is the last item in the ZIP file and may be
220 # up to 64K long. It is assumed that the "end of central directory" magic
221 # number does not appear in the comment.
222 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
223 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000224 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000225 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000226 if start >= 0:
227 # found the magic number; attempt to unpack and interpret
228 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000229 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000230 comment = data[start+sizeEndCentDir:]
231 # check that comment length is correct
232 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 # Append the archive comment and start offset
234 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000236
237 # Try to read the "Zip64 end of central directory" structure
238 return _EndRecData64(fpin, maxCommentStart + start - filesize,
239 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000240
241 # Unable to find a valid end of central directory structure
242 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243
Fred Drake484d7352000-10-02 21:14:52 +0000244
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000245class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000246 """Class with attributes describing each file in the ZIP archive."""
247
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000248 __slots__ = (
249 'orig_filename',
250 'filename',
251 'date_time',
252 'compress_type',
253 'comment',
254 'extra',
255 'create_system',
256 'create_version',
257 'extract_version',
258 'reserved',
259 'flag_bits',
260 'volume',
261 'internal_attr',
262 'external_attr',
263 'header_offset',
264 'CRC',
265 'compress_size',
266 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000267 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000268 )
269
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000270 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000271 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000272
273 # Terminate the file name at the first null byte. Null bytes in file
274 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000275 null_byte = filename.find(chr(0))
276 if null_byte >= 0:
277 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000278 # This is used to ensure paths in generated ZIP files always use
279 # forward slashes as the directory separator, as required by the
280 # ZIP format specification.
281 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000282 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000283
Greg Ward8e36d282003-06-18 00:53:06 +0000284 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000285 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000286 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000287 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000288 self.comment = b"" # Comment for each file
289 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000290 if sys.platform == 'win32':
291 self.create_system = 0 # System which created ZIP archive
292 else:
293 # Assume everything else is unix-y
294 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000295 self.create_version = 20 # Version which created ZIP archive
296 self.extract_version = 20 # Version needed to extract archive
297 self.reserved = 0 # Must be zero
298 self.flag_bits = 0 # ZIP flag bits
299 self.volume = 0 # Volume number of file header
300 self.internal_attr = 0 # Internal attributes
301 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000303 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000304 # CRC CRC-32 of the uncompressed file
305 # compress_size Size of the compressed file
306 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307
308 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000309 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310 dt = self.date_time
311 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000312 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # Set these to zero because we write them after the file data
315 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 else:
Tim Peterse1190062001-01-15 03:34:38 +0000317 CRC = self.CRC
318 compress_size = self.compress_size
319 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320
321 extra = self.extra
322
323 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
324 # File is larger than what fits into a 4 byte integer,
325 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000326 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000327 extra = extra + struct.pack(fmt,
328 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000329 file_size = 0xffffffff
330 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000331 self.extract_version = max(45, self.extract_version)
332 self.create_version = max(45, self.extract_version)
333
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000334 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000335 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000336 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 self.compress_type, dostime, dosdate, CRC,
338 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 len(filename), len(extra))
340 return header + filename + extra
341
342 def _encodeFilenameFlags(self):
343 try:
344 return self.filename.encode('ascii'), self.flag_bits
345 except UnicodeEncodeError:
346 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000347
348 def _decodeExtra(self):
349 # Try to decode the extra field.
350 extra = self.extra
351 unpack = struct.unpack
352 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000353 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354 if tp == 1:
355 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000356 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000358 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000360 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000361 elif ln == 0:
362 counts = ()
363 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000364 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365
366 idx = 0
367
368 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000369 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370 self.file_size = counts[idx]
371 idx += 1
372
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000373 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374 self.compress_size = counts[idx]
375 idx += 1
376
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000377 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 old = self.header_offset
379 self.header_offset = counts[idx]
380 idx+=1
381
382 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383
384
Thomas Wouterscf297e42007-02-23 15:07:44 +0000385class _ZipDecrypter:
386 """Class to handle decryption of files stored within a ZIP archive.
387
388 ZIP supports a password-based form of encryption. Even though known
389 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000390 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000391
392 Usage:
393 zd = _ZipDecrypter(mypwd)
394 plain_char = zd(cypher_char)
395 plain_text = map(zd, cypher_text)
396 """
397
398 def _GenerateCRCTable():
399 """Generate a CRC-32 table.
400
401 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
402 internal keys. We noticed that a direct implementation is faster than
403 relying on binascii.crc32().
404 """
405 poly = 0xedb88320
406 table = [0] * 256
407 for i in range(256):
408 crc = i
409 for j in range(8):
410 if crc & 1:
411 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
412 else:
413 crc = ((crc >> 1) & 0x7FFFFFFF)
414 table[i] = crc
415 return table
416 crctable = _GenerateCRCTable()
417
418 def _crc32(self, ch, crc):
419 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000420 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000421
422 def __init__(self, pwd):
423 self.key0 = 305419896
424 self.key1 = 591751049
425 self.key2 = 878082192
426 for p in pwd:
427 self._UpdateKeys(p)
428
429 def _UpdateKeys(self, c):
430 self.key0 = self._crc32(c, self.key0)
431 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
432 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000433 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000434
435 def __call__(self, c):
436 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000437 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000438 k = self.key2 | 2
439 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000440 self._UpdateKeys(c)
441 return c
442
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443class ZipExtFile:
444 """File-like object for reading an archive member.
445 Is returned by ZipFile.open().
446 """
447
448 def __init__(self, fileobj, zipinfo, decrypt=None):
449 self.fileobj = fileobj
450 self.decrypter = decrypt
451 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000452 self.rawbuffer = b''
453 self.readbuffer = b''
454 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000455 self.eof = False
456 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000457 self.nlSeps = (b"\n", )
458 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000459
460 self.compress_type = zipinfo.compress_type
461 self.compress_size = zipinfo.compress_size
462
463 self.closed = False
464 self.mode = "r"
465 self.name = zipinfo.filename
466
467 # read from compressed files in 64k blocks
468 self.compreadsize = 64*1024
469 if self.compress_type == ZIP_DEFLATED:
470 self.dc = zlib.decompressobj(-15)
471
472 def set_univ_newlines(self, univ_newlines):
473 self.univ_newlines = univ_newlines
474
475 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000476 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000477 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000478 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000479
480 def __iter__(self):
481 return self
482
483 def __next__(self):
484 nextline = self.readline()
485 if not nextline:
486 raise StopIteration()
487
488 return nextline
489
490 def close(self):
491 self.closed = True
492
493 def _checkfornewline(self):
494 nl, nllen = -1, -1
495 if self.linebuffer:
496 # ugly check for cases where half of an \r\n pair was
497 # read on the last pass, and the \r was discarded. In this
498 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000499 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000500 self.linebuffer = self.linebuffer[1:]
501
502 for sep in self.nlSeps:
503 nl = self.linebuffer.find(sep)
504 if nl >= 0:
505 nllen = len(sep)
506 return nl, nllen
507
508 return nl, nllen
509
510 def readline(self, size = -1):
511 """Read a line with approx. size. If size is negative,
512 read a whole line.
513 """
514 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000515 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000516 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000517 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000518
519 # check for a newline already in buffer
520 nl, nllen = self._checkfornewline()
521
522 if nl >= 0:
523 # the next line was already in the buffer
524 nl = min(nl, size)
525 else:
526 # no line break in buffer - try to read more
527 size -= len(self.linebuffer)
528 while nl < 0 and size > 0:
529 buf = self.read(min(size, 100))
530 if not buf:
531 break
532 self.linebuffer += buf
533 size -= len(buf)
534
535 # check for a newline in buffer
536 nl, nllen = self._checkfornewline()
537
538 # we either ran out of bytes in the file, or
539 # met the specified size limit without finding a newline,
540 # so return current buffer
541 if nl < 0:
542 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000543 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000544 return s
545
546 buf = self.linebuffer[:nl]
547 self.lastdiscard = self.linebuffer[nl:nl + nllen]
548 self.linebuffer = self.linebuffer[nl + nllen:]
549
550 # line is always returned with \n as newline char (except possibly
551 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553
554 def readlines(self, sizehint = -1):
555 """Return a list with all (following) lines. The sizehint parameter
556 is ignored in this implementation.
557 """
558 result = []
559 while True:
560 line = self.readline()
561 if not line: break
562 result.append(line)
563 return result
564
565 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000566 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000568 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000569
570 # determine read size
571 bytesToRead = self.compress_size - self.bytes_read
572
573 # adjust read size for encrypted files since the first 12 bytes
574 # are for the encryption/password information
575 if self.decrypter is not None:
576 bytesToRead -= 12
577
578 if size is not None and size >= 0:
579 if self.compress_type == ZIP_STORED:
580 lr = len(self.readbuffer)
581 bytesToRead = min(bytesToRead, size - lr)
582 elif self.compress_type == ZIP_DEFLATED:
583 if len(self.readbuffer) > size:
584 # the user has requested fewer bytes than we've already
585 # pulled through the decompressor; don't read any more
586 bytesToRead = 0
587 else:
588 # user will use up the buffer, so read some more
589 lr = len(self.rawbuffer)
590 bytesToRead = min(bytesToRead, self.compreadsize - lr)
591
592 # avoid reading past end of file contents
593 if bytesToRead + self.bytes_read > self.compress_size:
594 bytesToRead = self.compress_size - self.bytes_read
595
596 # try to read from file (if necessary)
597 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000598 data = self.fileobj.read(bytesToRead)
599 self.bytes_read += len(data)
600 try:
601 self.rawbuffer += data
602 except:
603 print(repr(self.fileobj), repr(self.rawbuffer),
604 repr(data))
605 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000606
607 # handle contents of raw buffer
608 if self.rawbuffer:
609 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000610 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000611
612 # decrypt new data if we were given an object to handle that
613 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000614 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000615
616 # decompress newly read data if necessary
617 if newdata and self.compress_type == ZIP_DEFLATED:
618 newdata = self.dc.decompress(newdata)
619 self.rawbuffer = self.dc.unconsumed_tail
620 if self.eof and len(self.rawbuffer) == 0:
621 # we're out of raw bytes (both from the file and
622 # the local buffer); flush just to make sure the
623 # decompressor is done
624 newdata += self.dc.flush()
625 # prevent decompressor from being used again
626 self.dc = None
627
628 self.readbuffer += newdata
629
630
631 # return what the user asked for
632 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000633 data = self.readbuffer
634 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000635 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000636 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000637 self.readbuffer = self.readbuffer[size:]
638
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000639 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640
641
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000642class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000643 """ Class with methods to open, read, write, close, list zip files.
644
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000645 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000646
Fred Drake3d9091e2001-03-26 15:49:24 +0000647 file: Either the path to the file, or a file-like object.
648 If it is a path, the file will be opened and closed by ZipFile.
649 mode: The mode can be either read "r", write "w" or append "a".
650 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000651 allowZip64: if True ZipFile will create files with ZIP64 extensions when
652 needed, otherwise it will raise an exception when this would
653 be necessary.
654
Fred Drake3d9091e2001-03-26 15:49:24 +0000655 """
Fred Drake484d7352000-10-02 21:14:52 +0000656
Fred Drake90eac282001-02-28 05:29:34 +0000657 fp = None # Set here since __del__ checks it
658
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000659 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000660 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000661 if mode not in ("r", "w", "a"):
662 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
663
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000664 if compression == ZIP_STORED:
665 pass
666 elif compression == ZIP_DEFLATED:
667 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000668 raise RuntimeError(
669 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000670 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000671 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000672
673 self._allowZip64 = allowZip64
674 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000675 self.debug = 0 # Level of printing: 0 through 3
676 self.NameToInfo = {} # Find file info given name
677 self.filelist = [] # List of ZipInfo instances for archive
678 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000679 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000680 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000681 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000682
Fred Drake3d9091e2001-03-26 15:49:24 +0000683 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000684 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000685 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 self._filePassed = 0
687 self.filename = file
688 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000689 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000690 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000691 except IOError:
692 if mode == 'a':
693 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000694 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000695 else:
696 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000697 else:
698 self._filePassed = 1
699 self.fp = file
700 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000701
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 self._GetContents()
704 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000705 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000707 try: # See if file is a zip file
708 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 self.fp.seek(self.start_dir, 0)
711 except BadZipfile: # file is not a zip file, just append
712 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000714 if not self._filePassed:
715 self.fp.close()
716 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000717 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718
719 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000720 """Read the directory, making sure we close the file if the format
721 is bad."""
722 try:
723 self._RealGetContents()
724 except BadZipfile:
725 if not self._filePassed:
726 self.fp.close()
727 self.fp = None
728 raise
729
730 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000731 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000733 endrec = _EndRecData(fp)
734 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000735 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000737 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000738 size_cd = endrec[_ECD_SIZE] # bytes in central directory
739 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
740 self.comment = endrec[_ECD_COMMENT] # archive comment
741
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000743 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000744 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
745 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000746 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
747
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000748 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000749 inferred = concat + offset_cd
750 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 # self.start_dir: Position of start of central directory
752 self.start_dir = offset_cd + concat
753 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000754 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000755 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 total = 0
757 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000758 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000759 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000760 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000761 centdir = struct.unpack(structCentralDir, centdir)
762 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000763 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000764 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000765 flags = centdir[5]
766 if flags & 0x800:
767 # UTF-8 file names extension
768 filename = filename.decode('utf-8')
769 else:
770 # Historical ZIP filename encoding
771 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000773 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000774 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
775 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 (x.create_version, x.create_system, x.extract_version, x.reserved,
778 x.flag_bits, x.compress_type, t, d,
779 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
780 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
781 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000782 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000784 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000785
786 x._decodeExtra()
787 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 self.filelist.append(x)
789 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000790
791 # update total bytes read from central directory
792 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
793 + centdir[_CD_EXTRA_FIELD_LENGTH]
794 + centdir[_CD_COMMENT_LENGTH])
795
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000797 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000798
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799
800 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000801 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 l = []
803 for data in self.filelist:
804 l.append(data.filename)
805 return l
806
807 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000808 """Return a list of class ZipInfo instances for files in the
809 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 return self.filelist
811
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000812 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000813 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000814 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
815 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000817 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000818 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
819 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820
821 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000823 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824 for zinfo in self.filelist:
825 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000826 # Read by chunks, to avoid an OverflowError or a
827 # MemoryError with very large embedded files.
828 f = self.open(zinfo.filename, "r")
829 while f.read(chunk_size): # Check CRC-32
830 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000831 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 return zinfo.filename
833
834 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000835 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000836 info = self.NameToInfo.get(name)
837 if info is None:
838 raise KeyError(
839 'There is no item named %r in the archive' % name)
840
841 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842
Thomas Wouterscf297e42007-02-23 15:07:44 +0000843 def setpassword(self, pwd):
844 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000845 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000846 self.pwd = pwd
847
848 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000849 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 return self.open(name, "r", pwd).read()
851
852 def open(self, name, mode="r", pwd=None):
853 """Return file-like object for 'name'."""
854 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000855 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000856 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000857 raise RuntimeError(
858 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000859
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860 # Only open a new file for instances where we were not
861 # given a file object in the constructor
862 if self._filePassed:
863 zef_file = self.fp
864 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000865 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866
Georg Brandlb533e262008-05-25 18:19:30 +0000867 # Make sure we have an info object
868 if isinstance(name, ZipInfo):
869 # 'name' is already an info object
870 zinfo = name
871 else:
872 # Get info object for name
873 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874
875 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000876
877 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000878 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000879 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000880 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000881
882 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000884 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000886
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000887 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000888 raise BadZipfile(
889 'File name in directory %r and header %r differ.'
890 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000891
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892 # check for encrypted flag & handle password
893 is_encrypted = zinfo.flag_bits & 0x1
894 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000895 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 if not pwd:
897 pwd = self.pwd
898 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000899 raise RuntimeError("File %s is encrypted, "
900 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901
Thomas Wouterscf297e42007-02-23 15:07:44 +0000902 zd = _ZipDecrypter(pwd)
903 # The first 12 bytes in the cypher stream is an encryption header
904 # used to strengthen the algorithm. The first 11 bytes are
905 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000906 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000907 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000909 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000910 if zinfo.flag_bits & 0x8:
911 # compare against the file type from extended local headers
912 check_byte = (zinfo._raw_time >> 8) & 0xff
913 else:
914 # compare against the CRC otherwise
915 check_byte = (zinfo.CRC >> 24) & 0xff
916 if h[11] != check_byte:
917 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918
919 # build and return a ZipExtFile
920 if zd is None:
921 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000922 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 zef = ZipExtFile(zef_file, zinfo, zd)
924
925 # set universal newlines on ZipExtFile if necessary
926 if "U" in mode:
927 zef.set_univ_newlines(True)
928 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000929
Christian Heimes790c8232008-01-07 21:14:23 +0000930 def extract(self, member, path=None, pwd=None):
931 """Extract a member from the archive to the current working directory,
932 using its full name. Its file information is extracted as accurately
933 as possible. `member' may be a filename or a ZipInfo object. You can
934 specify a different directory using `path'.
935 """
936 if not isinstance(member, ZipInfo):
937 member = self.getinfo(member)
938
939 if path is None:
940 path = os.getcwd()
941
942 return self._extract_member(member, path, pwd)
943
944 def extractall(self, path=None, members=None, pwd=None):
945 """Extract all members from the archive to the current working
946 directory. `path' specifies a different directory to extract to.
947 `members' is optional and must be a subset of the list returned
948 by namelist().
949 """
950 if members is None:
951 members = self.namelist()
952
953 for zipinfo in members:
954 self.extract(zipinfo, path, pwd)
955
956 def _extract_member(self, member, targetpath, pwd):
957 """Extract the ZipInfo object 'member' to a physical
958 file on the path targetpath.
959 """
960 # build the destination pathname, replacing
961 # forward slashes to platform specific separators.
Martin v. Löwis59e47792009-01-24 14:10:07 +0000962 if targetpath[-1:] in (os.path.sep, os.path.altsep):
Christian Heimes790c8232008-01-07 21:14:23 +0000963 targetpath = targetpath[:-1]
964
965 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000966 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000967 targetpath = os.path.join(targetpath, member.filename[1:])
968 else:
969 targetpath = os.path.join(targetpath, member.filename)
970
971 targetpath = os.path.normpath(targetpath)
972
973 # Create all upper directories if necessary.
974 upperdirs = os.path.dirname(targetpath)
975 if upperdirs and not os.path.exists(upperdirs):
976 os.makedirs(upperdirs)
977
Martin v. Löwis59e47792009-01-24 14:10:07 +0000978 if member.filename[-1] == '/':
979 os.mkdir(targetpath)
980 return targetpath
981
Georg Brandlb533e262008-05-25 18:19:30 +0000982 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000983 target = open(targetpath, "wb")
984 shutil.copyfileobj(source, target)
985 source.close()
986 target.close()
987
988 return targetpath
989
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000991 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000992 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000993 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000994 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000996 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000998 raise RuntimeError(
999 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001001 raise RuntimeError(
1002 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001004 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001005 if zinfo.file_size > ZIP64_LIMIT:
1006 if not self._allowZip64:
1007 raise LargeZipFile("Filesize would require ZIP64 extensions")
1008 if zinfo.header_offset > ZIP64_LIMIT:
1009 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001010 raise LargeZipFile(
1011 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001012
1013 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001014 """Put the bytes from filename into the archive under the name
1015 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001016 if not self.fp:
1017 raise RuntimeError(
1018 "Attempt to write to ZIP archive that was already closed")
1019
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001020 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001021 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001022 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 date_time = mtime[0:6]
1024 # Create ZipInfo instance to store file information
1025 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001026 arcname = filename
1027 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1028 while arcname[0] in (os.sep, os.altsep):
1029 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001030 if isdir:
1031 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001032 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001033 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001035 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 else:
Tim Peterse1190062001-01-15 03:34:38 +00001037 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001038
1039 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001040 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001041 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001042
1043 self._writecheck(zinfo)
1044 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001045
1046 if isdir:
1047 zinfo.file_size = 0
1048 zinfo.compress_size = 0
1049 zinfo.CRC = 0
1050 self.filelist.append(zinfo)
1051 self.NameToInfo[zinfo.filename] = zinfo
1052 self.fp.write(zinfo.FileHeader())
1053 return
1054
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001055 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001056 # Must overwrite CRC and sizes with correct data later
1057 zinfo.CRC = CRC = 0
1058 zinfo.compress_size = compress_size = 0
1059 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 if zinfo.compress_type == ZIP_DEFLATED:
1062 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1063 zlib.DEFLATED, -15)
1064 else:
1065 cmpr = None
1066 while 1:
1067 buf = fp.read(1024 * 8)
1068 if not buf:
1069 break
1070 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001071 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 if cmpr:
1073 buf = cmpr.compress(buf)
1074 compress_size = compress_size + len(buf)
1075 self.fp.write(buf)
1076 fp.close()
1077 if cmpr:
1078 buf = cmpr.flush()
1079 compress_size = compress_size + len(buf)
1080 self.fp.write(buf)
1081 zinfo.compress_size = compress_size
1082 else:
1083 zinfo.compress_size = file_size
1084 zinfo.CRC = CRC
1085 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001086 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001087 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001088 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001089 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001091 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001092 self.filelist.append(zinfo)
1093 self.NameToInfo[zinfo.filename] = zinfo
1094
Guido van Rossum85825dc2007-08-27 17:03:28 +00001095 def writestr(self, zinfo_or_arcname, data):
1096 """Write a file into the archive. The contents is 'data', which
1097 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1098 it is encoded as UTF-8 first.
1099 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001100 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001101 if isinstance(data, str):
1102 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001103 if not isinstance(zinfo_or_arcname, ZipInfo):
1104 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001105 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001106 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001107 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001108 else:
1109 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001110
1111 if not self.fp:
1112 raise RuntimeError(
1113 "Attempt to write to ZIP archive that was already closed")
1114
Guido van Rossum85825dc2007-08-27 17:03:28 +00001115 zinfo.file_size = len(data) # Uncompressed size
1116 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001117 self._writecheck(zinfo)
1118 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001119 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 if zinfo.compress_type == ZIP_DEFLATED:
1121 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1122 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001123 data = co.compress(data) + co.flush()
1124 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 else:
1126 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001127 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001129 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001130 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001131 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001132 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001133 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001134 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 self.filelist.append(zinfo)
1136 self.NameToInfo[zinfo.filename] = zinfo
1137
1138 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001139 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001140 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141
1142 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001143 """Close the file, and for mode "w" and "a" write the ending
1144 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001145 if self.fp is None:
1146 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001147
1148 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149 count = 0
1150 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001151 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001152 count = count + 1
1153 dt = zinfo.date_time
1154 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001155 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001156 extra = []
1157 if zinfo.file_size > ZIP64_LIMIT \
1158 or zinfo.compress_size > ZIP64_LIMIT:
1159 extra.append(zinfo.file_size)
1160 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001161 file_size = 0xffffffff
1162 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001163 else:
1164 file_size = zinfo.file_size
1165 compress_size = zinfo.compress_size
1166
1167 if zinfo.header_offset > ZIP64_LIMIT:
1168 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001169 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170 else:
1171 header_offset = zinfo.header_offset
1172
1173 extra_data = zinfo.extra
1174 if extra:
1175 # Append a ZIP64 field to the extra's
1176 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001177 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001178 1, 8*len(extra), *extra) + extra_data
1179
1180 extract_version = max(45, zinfo.extract_version)
1181 create_version = max(45, zinfo.create_version)
1182 else:
1183 extract_version = zinfo.extract_version
1184 create_version = zinfo.create_version
1185
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001186 try:
1187 filename, flag_bits = zinfo._encodeFilenameFlags()
1188 centdir = struct.pack(structCentralDir,
Georg Brandl2ee470f2008-07-16 12:55:28 +00001189 stringCentralDir, create_version,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001190 zinfo.create_system, extract_version, zinfo.reserved,
1191 flag_bits, zinfo.compress_type, dostime, dosdate,
1192 zinfo.CRC, compress_size, file_size,
1193 len(filename), len(extra_data), len(zinfo.comment),
1194 0, zinfo.internal_attr, zinfo.external_attr,
1195 header_offset)
1196 except DeprecationWarning:
1197 print >>sys.stderr, (structCentralDir,
1198 stringCentralDir, create_version,
1199 zinfo.create_system, extract_version, zinfo.reserved,
1200 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1201 zinfo.CRC, compress_size, file_size,
1202 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1203 0, zinfo.internal_attr, zinfo.external_attr,
1204 header_offset)
1205 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001206 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001207 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001208 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001209 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001210
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 pos2 = self.fp.tell()
1212 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001213 centDirCount = count
1214 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001215 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001216 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1217 centDirOffset > ZIP64_LIMIT or
1218 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001219 # Need to write the ZIP64 end-of-archive records
1220 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001221 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001222 44, 45, 45, 0, 0, centDirCount, centDirCount,
1223 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001224 self.fp.write(zip64endrec)
1225
1226 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001227 structEndArchive64Locator,
1228 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001229 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001230 centDirCount = min(centDirCount, 0xFFFF)
1231 centDirSize = min(centDirSize, 0xFFFFFFFF)
1232 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001233
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001234 # check for valid comment length
1235 if len(self.comment) >= ZIP_MAX_COMMENT:
1236 if self.debug > 0:
1237 msg = 'Archive comment is too long; truncating to %d bytes' \
1238 % ZIP_MAX_COMMENT
1239 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001240
Georg Brandl2ee470f2008-07-16 12:55:28 +00001241 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001242 0, 0, centDirCount, centDirCount,
1243 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001244 self.fp.write(endrec)
1245 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001246 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001247
Fred Drake3d9091e2001-03-26 15:49:24 +00001248 if not self._filePassed:
1249 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001250 self.fp = None
1251
1252
1253class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001254 """Class to create ZIP archives with Python library files and packages."""
1255
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001256 def writepy(self, pathname, basename = ""):
1257 """Add all files from "pathname" to the ZIP archive.
1258
Fred Drake484d7352000-10-02 21:14:52 +00001259 If pathname is a package directory, search the directory and
1260 all package subdirectories recursively for all *.py and enter
1261 the modules into the archive. If pathname is a plain
1262 directory, listdir *.py and enter all modules. Else, pathname
1263 must be a Python *.py file and the module will be put into the
1264 archive. Added modules are always module.pyo or module.pyc.
1265 This method will compile the module.py into module.pyc if
1266 necessary.
1267 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 dir, name = os.path.split(pathname)
1269 if os.path.isdir(pathname):
1270 initname = os.path.join(pathname, "__init__.py")
1271 if os.path.isfile(initname):
1272 # This is a package directory, add it
1273 if basename:
1274 basename = "%s/%s" % (basename, name)
1275 else:
1276 basename = name
1277 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001278 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001279 fname, arcname = self._get_codename(initname[0:-3], basename)
1280 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001281 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 self.write(fname, arcname)
1283 dirlist = os.listdir(pathname)
1284 dirlist.remove("__init__.py")
1285 # Add all *.py files and package subdirectories
1286 for filename in dirlist:
1287 path = os.path.join(pathname, filename)
1288 root, ext = os.path.splitext(filename)
1289 if os.path.isdir(path):
1290 if os.path.isfile(os.path.join(path, "__init__.py")):
1291 # This is a package directory, add it
1292 self.writepy(path, basename) # Recursive call
1293 elif ext == ".py":
1294 fname, arcname = self._get_codename(path[0:-3],
1295 basename)
1296 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001297 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 self.write(fname, arcname)
1299 else:
1300 # This is NOT a package directory, add its files at top level
1301 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001302 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 for filename in os.listdir(pathname):
1304 path = os.path.join(pathname, filename)
1305 root, ext = os.path.splitext(filename)
1306 if ext == ".py":
1307 fname, arcname = self._get_codename(path[0:-3],
1308 basename)
1309 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001310 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001311 self.write(fname, arcname)
1312 else:
1313 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001314 raise RuntimeError(
1315 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 fname, arcname = self._get_codename(pathname[0:-3], basename)
1317 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001318 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 self.write(fname, arcname)
1320
1321 def _get_codename(self, pathname, basename):
1322 """Return (filename, archivename) for the path.
1323
Fred Drake484d7352000-10-02 21:14:52 +00001324 Given a module name path, return the correct file path and
1325 archive name, compiling if necessary. For example, given
1326 /python/lib/string, return (/python/lib/string.pyc, string).
1327 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 file_py = pathname + ".py"
1329 file_pyc = pathname + ".pyc"
1330 file_pyo = pathname + ".pyo"
1331 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001332 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001333 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001335 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001336 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001338 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001339 try:
1340 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001341 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001342 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 fname = file_pyc
1344 else:
1345 fname = file_pyc
1346 archivename = os.path.split(fname)[1]
1347 if basename:
1348 archivename = "%s/%s" % (basename, archivename)
1349 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001350
1351
1352def main(args = None):
1353 import textwrap
1354 USAGE=textwrap.dedent("""\
1355 Usage:
1356 zipfile.py -l zipfile.zip # Show listing of a zipfile
1357 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1358 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1359 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1360 """)
1361 if args is None:
1362 args = sys.argv[1:]
1363
1364 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001365 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001366 sys.exit(1)
1367
1368 if args[0] == '-l':
1369 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001370 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371 sys.exit(1)
1372 zf = ZipFile(args[1], 'r')
1373 zf.printdir()
1374 zf.close()
1375
1376 elif args[0] == '-t':
1377 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001378 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001379 sys.exit(1)
1380 zf = ZipFile(args[1], 'r')
1381 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001382 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001383
1384 elif args[0] == '-e':
1385 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001386 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001387 sys.exit(1)
1388
1389 zf = ZipFile(args[1], 'r')
1390 out = args[2]
1391 for path in zf.namelist():
1392 if path.startswith('./'):
1393 tgt = os.path.join(out, path[2:])
1394 else:
1395 tgt = os.path.join(out, path)
1396
1397 tgtdir = os.path.dirname(tgt)
1398 if not os.path.exists(tgtdir):
1399 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001400 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001401 fp.write(zf.read(path))
1402 fp.close()
1403 zf.close()
1404
1405 elif args[0] == '-c':
1406 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001407 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001408 sys.exit(1)
1409
1410 def addToZip(zf, path, zippath):
1411 if os.path.isfile(path):
1412 zf.write(path, zippath, ZIP_DEFLATED)
1413 elif os.path.isdir(path):
1414 for nm in os.listdir(path):
1415 addToZip(zf,
1416 os.path.join(path, nm), os.path.join(zippath, nm))
1417 # else: ignore
1418
1419 zf = ZipFile(args[1], 'w', allowZip64=True)
1420 for src in args[2:]:
1421 addToZip(zf, src, os.path.basename(src))
1422
1423 zf.close()
1424
1425if __name__ == "__main__":
1426 main()