blob: fda990385d0ec2aae59211ed121321776d9fc643 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Thomas Wouters0e3f5912006-08-11 14:57:12 +000031ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000134 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000135 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000136 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000139 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000140 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000141 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000143 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000145def _EndRecData64(fpin, offset, endrec):
146 """
147 Read the ZIP64 end-of-archive records and use that to update endrec
148 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149 fpin.seek(offset - sizeEndCentDir64Locator, 2)
150 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
152 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000153 return endrec
154
155 if diskno != 0 or disks != 1:
156 raise BadZipfile("zipfiles that span multiple disks are not supported")
157
158 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000159 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
160 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000161 sig, sz, create_version, read_version, disk_num, disk_dir, \
162 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 struct.unpack(structEndArchive64, data)
164 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 # Update the original endrec using data from the ZIP64 record
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000168 endrec[_ECD_DISK_NUMBER] = disk_num
169 endrec[_ECD_DISK_START] = disk_dir
170 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
171 endrec[_ECD_ENTRIES_TOTAL] = dircount2
172 endrec[_ECD_SIZE] = dirsize
173 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 return endrec
175
176
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000177def _EndRecData(fpin):
178 """Return data from the "End of Central Directory" record, or None.
179
180 The data is a list of the nine items in the ZIP "End of central dir"
181 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000182
183 # Determine file size
184 fpin.seek(0, 2)
185 filesize = fpin.tell()
186
187 # Check to see if this is ZIP file with no archive comment (the
188 # "end of central directory" structure should be the last item in the
189 # file if this is the case).
190 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000191 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000192 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000193 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000194 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000195 endrec=list(endrec)
196
197 # Append a blank comment and record start offset
198 endrec.append(b"")
199 endrec.append(filesize - sizeEndCentDir)
200 if endrec[_ECD_OFFSET] == 0xffffffff:
201 # the value for the "offset of the start of the central directory"
202 # indicates that there is a "Zip64 end of central directory"
203 # structure present, so go look for it
204 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000207
208 # Either this is not a ZIP file, or it is a ZIP file with an archive
209 # comment. Search the end of the file for the "end of central directory"
210 # record signature. The comment is the last item in the ZIP file and may be
211 # up to 64K long. It is assumed that the "end of central directory" magic
212 # number does not appear in the comment.
213 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
214 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000215 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000217 if start >= 0:
218 # found the magic number; attempt to unpack and interpret
219 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000220 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221 comment = data[start+sizeEndCentDir:]
222 # check that comment length is correct
223 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000224 # Append the archive comment and start offset
225 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000226 endrec.append(maxCommentStart + start)
227 if endrec[_ECD_OFFSET] == 0xffffffff:
228 # There is apparently a "Zip64 end of central directory"
229 # structure present, so go look for it
230 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
233 # Unable to find a valid end of central directory structure
234 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000235
Fred Drake484d7352000-10-02 21:14:52 +0000236
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000237class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000238 """Class with attributes describing each file in the ZIP archive."""
239
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000240 __slots__ = (
241 'orig_filename',
242 'filename',
243 'date_time',
244 'compress_type',
245 'comment',
246 'extra',
247 'create_system',
248 'create_version',
249 'extract_version',
250 'reserved',
251 'flag_bits',
252 'volume',
253 'internal_attr',
254 'external_attr',
255 'header_offset',
256 'CRC',
257 'compress_size',
258 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000259 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260 )
261
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000262 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000263 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000264
265 # Terminate the file name at the first null byte. Null bytes in file
266 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000267 null_byte = filename.find(chr(0))
268 if null_byte >= 0:
269 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000270 # This is used to ensure paths in generated ZIP files always use
271 # forward slashes as the directory separator, as required by the
272 # ZIP format specification.
273 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000274 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000275
Greg Ward8e36d282003-06-18 00:53:06 +0000276 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000277 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000278 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000279 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000280 self.comment = b"" # Comment for each file
281 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000282 if sys.platform == 'win32':
283 self.create_system = 0 # System which created ZIP archive
284 else:
285 # Assume everything else is unix-y
286 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000287 self.create_version = 20 # Version which created ZIP archive
288 self.extract_version = 20 # Version needed to extract archive
289 self.reserved = 0 # Must be zero
290 self.flag_bits = 0 # ZIP flag bits
291 self.volume = 0 # Volume number of file header
292 self.internal_attr = 0 # Internal attributes
293 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000294 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000295 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000296 # CRC CRC-32 of the uncompressed file
297 # compress_size Size of the compressed file
298 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
300 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000301 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302 dt = self.date_time
303 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000304 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # Set these to zero because we write them after the file data
307 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000308 else:
Tim Peterse1190062001-01-15 03:34:38 +0000309 CRC = self.CRC
310 compress_size = self.compress_size
311 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000312
313 extra = self.extra
314
315 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
316 # File is larger than what fits into a 4 byte integer,
317 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000318 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 extra = extra + struct.pack(fmt,
320 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000321 file_size = 0xffffffff
322 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000323 self.extract_version = max(45, self.extract_version)
324 self.create_version = max(45, self.extract_version)
325
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000326 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000327 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000328 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000329 self.compress_type, dostime, dosdate, CRC,
330 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000331 len(filename), len(extra))
332 return header + filename + extra
333
334 def _encodeFilenameFlags(self):
335 try:
336 return self.filename.encode('ascii'), self.flag_bits
337 except UnicodeEncodeError:
338 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339
340 def _decodeExtra(self):
341 # Try to decode the extra field.
342 extra = self.extra
343 unpack = struct.unpack
344 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000345 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346 if tp == 1:
347 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000348 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000349 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000350 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000351 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000352 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000353 elif ln == 0:
354 counts = ()
355 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000356 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357
358 idx = 0
359
360 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 self.file_size = counts[idx]
363 idx += 1
364
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000365 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 self.compress_size = counts[idx]
367 idx += 1
368
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000369 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370 old = self.header_offset
371 self.header_offset = counts[idx]
372 idx+=1
373
374 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000375
376
Thomas Wouterscf297e42007-02-23 15:07:44 +0000377class _ZipDecrypter:
378 """Class to handle decryption of files stored within a ZIP archive.
379
380 ZIP supports a password-based form of encryption. Even though known
381 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000382 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000383
384 Usage:
385 zd = _ZipDecrypter(mypwd)
386 plain_char = zd(cypher_char)
387 plain_text = map(zd, cypher_text)
388 """
389
390 def _GenerateCRCTable():
391 """Generate a CRC-32 table.
392
393 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
394 internal keys. We noticed that a direct implementation is faster than
395 relying on binascii.crc32().
396 """
397 poly = 0xedb88320
398 table = [0] * 256
399 for i in range(256):
400 crc = i
401 for j in range(8):
402 if crc & 1:
403 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
404 else:
405 crc = ((crc >> 1) & 0x7FFFFFFF)
406 table[i] = crc
407 return table
408 crctable = _GenerateCRCTable()
409
410 def _crc32(self, ch, crc):
411 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000412 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000413
414 def __init__(self, pwd):
415 self.key0 = 305419896
416 self.key1 = 591751049
417 self.key2 = 878082192
418 for p in pwd:
419 self._UpdateKeys(p)
420
421 def _UpdateKeys(self, c):
422 self.key0 = self._crc32(c, self.key0)
423 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
424 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000425 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000426
427 def __call__(self, c):
428 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000429 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000430 k = self.key2 | 2
431 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000432 self._UpdateKeys(c)
433 return c
434
Guido van Rossumd8faa362007-04-27 19:54:29 +0000435class ZipExtFile:
436 """File-like object for reading an archive member.
437 Is returned by ZipFile.open().
438 """
439
440 def __init__(self, fileobj, zipinfo, decrypt=None):
441 self.fileobj = fileobj
442 self.decrypter = decrypt
443 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000444 self.rawbuffer = b''
445 self.readbuffer = b''
446 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000447 self.eof = False
448 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000449 self.nlSeps = (b"\n", )
450 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451
452 self.compress_type = zipinfo.compress_type
453 self.compress_size = zipinfo.compress_size
454
455 self.closed = False
456 self.mode = "r"
457 self.name = zipinfo.filename
458
459 # read from compressed files in 64k blocks
460 self.compreadsize = 64*1024
461 if self.compress_type == ZIP_DEFLATED:
462 self.dc = zlib.decompressobj(-15)
463
464 def set_univ_newlines(self, univ_newlines):
465 self.univ_newlines = univ_newlines
466
467 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000470 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000471
472 def __iter__(self):
473 return self
474
475 def __next__(self):
476 nextline = self.readline()
477 if not nextline:
478 raise StopIteration()
479
480 return nextline
481
482 def close(self):
483 self.closed = True
484
485 def _checkfornewline(self):
486 nl, nllen = -1, -1
487 if self.linebuffer:
488 # ugly check for cases where half of an \r\n pair was
489 # read on the last pass, and the \r was discarded. In this
490 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000491 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 self.linebuffer = self.linebuffer[1:]
493
494 for sep in self.nlSeps:
495 nl = self.linebuffer.find(sep)
496 if nl >= 0:
497 nllen = len(sep)
498 return nl, nllen
499
500 return nl, nllen
501
502 def readline(self, size = -1):
503 """Read a line with approx. size. If size is negative,
504 read a whole line.
505 """
506 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000507 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000508 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000509 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510
511 # check for a newline already in buffer
512 nl, nllen = self._checkfornewline()
513
514 if nl >= 0:
515 # the next line was already in the buffer
516 nl = min(nl, size)
517 else:
518 # no line break in buffer - try to read more
519 size -= len(self.linebuffer)
520 while nl < 0 and size > 0:
521 buf = self.read(min(size, 100))
522 if not buf:
523 break
524 self.linebuffer += buf
525 size -= len(buf)
526
527 # check for a newline in buffer
528 nl, nllen = self._checkfornewline()
529
530 # we either ran out of bytes in the file, or
531 # met the specified size limit without finding a newline,
532 # so return current buffer
533 if nl < 0:
534 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000535 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000536 return s
537
538 buf = self.linebuffer[:nl]
539 self.lastdiscard = self.linebuffer[nl:nl + nllen]
540 self.linebuffer = self.linebuffer[nl + nllen:]
541
542 # line is always returned with \n as newline char (except possibly
543 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000544 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000545
546 def readlines(self, sizehint = -1):
547 """Return a list with all (following) lines. The sizehint parameter
548 is ignored in this implementation.
549 """
550 result = []
551 while True:
552 line = self.readline()
553 if not line: break
554 result.append(line)
555 return result
556
557 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000558 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000560 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000561
562 # determine read size
563 bytesToRead = self.compress_size - self.bytes_read
564
565 # adjust read size for encrypted files since the first 12 bytes
566 # are for the encryption/password information
567 if self.decrypter is not None:
568 bytesToRead -= 12
569
570 if size is not None and size >= 0:
571 if self.compress_type == ZIP_STORED:
572 lr = len(self.readbuffer)
573 bytesToRead = min(bytesToRead, size - lr)
574 elif self.compress_type == ZIP_DEFLATED:
575 if len(self.readbuffer) > size:
576 # the user has requested fewer bytes than we've already
577 # pulled through the decompressor; don't read any more
578 bytesToRead = 0
579 else:
580 # user will use up the buffer, so read some more
581 lr = len(self.rawbuffer)
582 bytesToRead = min(bytesToRead, self.compreadsize - lr)
583
584 # avoid reading past end of file contents
585 if bytesToRead + self.bytes_read > self.compress_size:
586 bytesToRead = self.compress_size - self.bytes_read
587
588 # try to read from file (if necessary)
589 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000590 data = self.fileobj.read(bytesToRead)
591 self.bytes_read += len(data)
592 try:
593 self.rawbuffer += data
594 except:
595 print(repr(self.fileobj), repr(self.rawbuffer),
596 repr(data))
597 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000598
599 # handle contents of raw buffer
600 if self.rawbuffer:
601 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000602 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000603
604 # decrypt new data if we were given an object to handle that
605 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000606 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000607
608 # decompress newly read data if necessary
609 if newdata and self.compress_type == ZIP_DEFLATED:
610 newdata = self.dc.decompress(newdata)
611 self.rawbuffer = self.dc.unconsumed_tail
612 if self.eof and len(self.rawbuffer) == 0:
613 # we're out of raw bytes (both from the file and
614 # the local buffer); flush just to make sure the
615 # decompressor is done
616 newdata += self.dc.flush()
617 # prevent decompressor from being used again
618 self.dc = None
619
620 self.readbuffer += newdata
621
622
623 # return what the user asked for
624 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000625 data = self.readbuffer
626 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000627 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000628 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000629 self.readbuffer = self.readbuffer[size:]
630
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000631 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000632
633
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000634class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000635 """ Class with methods to open, read, write, close, list zip files.
636
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000637 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000638
Fred Drake3d9091e2001-03-26 15:49:24 +0000639 file: Either the path to the file, or a file-like object.
640 If it is a path, the file will be opened and closed by ZipFile.
641 mode: The mode can be either read "r", write "w" or append "a".
642 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000643 allowZip64: if True ZipFile will create files with ZIP64 extensions when
644 needed, otherwise it will raise an exception when this would
645 be necessary.
646
Fred Drake3d9091e2001-03-26 15:49:24 +0000647 """
Fred Drake484d7352000-10-02 21:14:52 +0000648
Fred Drake90eac282001-02-28 05:29:34 +0000649 fp = None # Set here since __del__ checks it
650
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000651 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000652 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000653 if mode not in ("r", "w", "a"):
654 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
655
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000656 if compression == ZIP_STORED:
657 pass
658 elif compression == ZIP_DEFLATED:
659 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000660 raise RuntimeError(
661 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000662 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000663 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000664
665 self._allowZip64 = allowZip64
666 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000667 self.debug = 0 # Level of printing: 0 through 3
668 self.NameToInfo = {} # Find file info given name
669 self.filelist = [] # List of ZipInfo instances for archive
670 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000671 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000672 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000673 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000674
Fred Drake3d9091e2001-03-26 15:49:24 +0000675 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000676 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000677 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000678 self._filePassed = 0
679 self.filename = file
680 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000681 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000682 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000683 except IOError:
684 if mode == 'a':
685 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000686 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000687 else:
688 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000689 else:
690 self._filePassed = 1
691 self.fp = file
692 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000693
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000694 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000695 self._GetContents()
696 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000697 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000699 try: # See if file is a zip file
700 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000702 self.fp.seek(self.start_dir, 0)
703 except BadZipfile: # file is not a zip file, just append
704 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000705 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000706 if not self._filePassed:
707 self.fp.close()
708 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000709 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710
711 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000712 """Read the directory, making sure we close the file if the format
713 is bad."""
714 try:
715 self._RealGetContents()
716 except BadZipfile:
717 if not self._filePassed:
718 self.fp.close()
719 self.fp = None
720 raise
721
722 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000723 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000725 endrec = _EndRecData(fp)
726 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000727 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000729 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000730 size_cd = endrec[_ECD_SIZE] # bytes in central directory
731 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
732 self.comment = endrec[_ECD_COMMENT] # archive comment
733
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000735 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
736 if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
737 # If the offset of the "End of Central Dir" record requires Zip64
738 # extension structures, account for them
739 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
740
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000742 inferred = concat + offset_cd
743 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 # self.start_dir: Position of start of central directory
745 self.start_dir = offset_cd + concat
746 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000747 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000748 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749 total = 0
750 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000751 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000752 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000753 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000754 centdir = struct.unpack(structCentralDir, centdir)
755 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000756 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000757 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000758 flags = centdir[5]
759 if flags & 0x800:
760 # UTF-8 file names extension
761 filename = filename.decode('utf-8')
762 else:
763 # Historical ZIP filename encoding
764 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000766 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000767 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
768 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000769 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 (x.create_version, x.create_system, x.extract_version, x.reserved,
771 x.flag_bits, x.compress_type, t, d,
772 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
773 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
774 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000775 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000777 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
779 x._decodeExtra()
780 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 self.filelist.append(x)
782 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000783
784 # update total bytes read from central directory
785 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
786 + centdir[_CD_EXTRA_FIELD_LENGTH]
787 + centdir[_CD_COMMENT_LENGTH])
788
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000790 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000791
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792
793 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000794 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000795 l = []
796 for data in self.filelist:
797 l.append(data.filename)
798 return l
799
800 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000801 """Return a list of class ZipInfo instances for files in the
802 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 return self.filelist
804
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000805 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000806 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000807 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
808 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000810 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000811 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
812 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813
814 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000815 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816 for zinfo in self.filelist:
817 try:
Tim Peterse1190062001-01-15 03:34:38 +0000818 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000819 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 return zinfo.filename
821
822 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000823 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000824 info = self.NameToInfo.get(name)
825 if info is None:
826 raise KeyError(
827 'There is no item named %r in the archive' % name)
828
829 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830
Thomas Wouterscf297e42007-02-23 15:07:44 +0000831 def setpassword(self, pwd):
832 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000833 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000834 self.pwd = pwd
835
836 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000837 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 return self.open(name, "r", pwd).read()
839
840 def open(self, name, mode="r", pwd=None):
841 """Return file-like object for 'name'."""
842 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000843 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000845 raise RuntimeError(
846 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000847
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848 # Only open a new file for instances where we were not
849 # given a file object in the constructor
850 if self._filePassed:
851 zef_file = self.fp
852 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000853 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Georg Brandlb533e262008-05-25 18:19:30 +0000855 # Make sure we have an info object
856 if isinstance(name, ZipInfo):
857 # 'name' is already an info object
858 zinfo = name
859 else:
860 # Get info object for name
861 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862
863 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000864
865 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000866 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000867 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000868 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000869
870 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000872 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000873 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000874
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000875 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000876 raise BadZipfile(
877 'File name in directory %r and header %r differ.'
878 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000879
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 # check for encrypted flag & handle password
881 is_encrypted = zinfo.flag_bits & 0x1
882 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000883 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 if not pwd:
885 pwd = self.pwd
886 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000887 raise RuntimeError("File %s is encrypted, "
888 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889
Thomas Wouterscf297e42007-02-23 15:07:44 +0000890 zd = _ZipDecrypter(pwd)
891 # The first 12 bytes in the cypher stream is an encryption header
892 # used to strengthen the algorithm. The first 11 bytes are
893 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000894 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000895 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000897 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000898 if zinfo.flag_bits & 0x8:
899 # compare against the file type from extended local headers
900 check_byte = (zinfo._raw_time >> 8) & 0xff
901 else:
902 # compare against the CRC otherwise
903 check_byte = (zinfo.CRC >> 24) & 0xff
904 if h[11] != check_byte:
905 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
907 # build and return a ZipExtFile
908 if zd is None:
909 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911 zef = ZipExtFile(zef_file, zinfo, zd)
912
913 # set universal newlines on ZipExtFile if necessary
914 if "U" in mode:
915 zef.set_univ_newlines(True)
916 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000917
Christian Heimes790c8232008-01-07 21:14:23 +0000918 def extract(self, member, path=None, pwd=None):
919 """Extract a member from the archive to the current working directory,
920 using its full name. Its file information is extracted as accurately
921 as possible. `member' may be a filename or a ZipInfo object. You can
922 specify a different directory using `path'.
923 """
924 if not isinstance(member, ZipInfo):
925 member = self.getinfo(member)
926
927 if path is None:
928 path = os.getcwd()
929
930 return self._extract_member(member, path, pwd)
931
932 def extractall(self, path=None, members=None, pwd=None):
933 """Extract all members from the archive to the current working
934 directory. `path' specifies a different directory to extract to.
935 `members' is optional and must be a subset of the list returned
936 by namelist().
937 """
938 if members is None:
939 members = self.namelist()
940
941 for zipinfo in members:
942 self.extract(zipinfo, path, pwd)
943
944 def _extract_member(self, member, targetpath, pwd):
945 """Extract the ZipInfo object 'member' to a physical
946 file on the path targetpath.
947 """
948 # build the destination pathname, replacing
949 # forward slashes to platform specific separators.
950 if targetpath[-1:] == "/":
951 targetpath = targetpath[:-1]
952
953 # don't include leading "/" from file name if present
954 if os.path.isabs(member.filename):
955 targetpath = os.path.join(targetpath, member.filename[1:])
956 else:
957 targetpath = os.path.join(targetpath, member.filename)
958
959 targetpath = os.path.normpath(targetpath)
960
961 # Create all upper directories if necessary.
962 upperdirs = os.path.dirname(targetpath)
963 if upperdirs and not os.path.exists(upperdirs):
964 os.makedirs(upperdirs)
965
Georg Brandlb533e262008-05-25 18:19:30 +0000966 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000967 target = open(targetpath, "wb")
968 shutil.copyfileobj(source, target)
969 source.close()
970 target.close()
971
972 return targetpath
973
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000974 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000975 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000976 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000977 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000978 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000979 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000980 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000982 raise RuntimeError(
983 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000985 raise RuntimeError(
986 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000988 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 if zinfo.file_size > ZIP64_LIMIT:
990 if not self._allowZip64:
991 raise LargeZipFile("Filesize would require ZIP64 extensions")
992 if zinfo.header_offset > ZIP64_LIMIT:
993 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000994 raise LargeZipFile(
995 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996
997 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000998 """Put the bytes from filename into the archive under the name
999 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001000 if not self.fp:
1001 raise RuntimeError(
1002 "Attempt to write to ZIP archive that was already closed")
1003
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001005 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 date_time = mtime[0:6]
1007 # Create ZipInfo instance to store file information
1008 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001009 arcname = filename
1010 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1011 while arcname[0] in (os.sep, os.altsep):
1012 arcname = arcname[1:]
1013 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001014 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001016 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017 else:
Tim Peterse1190062001-01-15 03:34:38 +00001018 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001019
1020 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001021 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001022 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001023
1024 self._writecheck(zinfo)
1025 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001026 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001027 # Must overwrite CRC and sizes with correct data later
1028 zinfo.CRC = CRC = 0
1029 zinfo.compress_size = compress_size = 0
1030 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001031 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 if zinfo.compress_type == ZIP_DEFLATED:
1033 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1034 zlib.DEFLATED, -15)
1035 else:
1036 cmpr = None
1037 while 1:
1038 buf = fp.read(1024 * 8)
1039 if not buf:
1040 break
1041 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001042 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 if cmpr:
1044 buf = cmpr.compress(buf)
1045 compress_size = compress_size + len(buf)
1046 self.fp.write(buf)
1047 fp.close()
1048 if cmpr:
1049 buf = cmpr.flush()
1050 compress_size = compress_size + len(buf)
1051 self.fp.write(buf)
1052 zinfo.compress_size = compress_size
1053 else:
1054 zinfo.compress_size = file_size
1055 zinfo.CRC = CRC
1056 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001057 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001058 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001059 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001060 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001062 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 self.filelist.append(zinfo)
1064 self.NameToInfo[zinfo.filename] = zinfo
1065
Guido van Rossum85825dc2007-08-27 17:03:28 +00001066 def writestr(self, zinfo_or_arcname, data):
1067 """Write a file into the archive. The contents is 'data', which
1068 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1069 it is encoded as UTF-8 first.
1070 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001071 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001072 if isinstance(data, str):
1073 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001074 if not isinstance(zinfo_or_arcname, ZipInfo):
1075 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001076 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001077 zinfo.compress_type = self.compression
1078 else:
1079 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001080
1081 if not self.fp:
1082 raise RuntimeError(
1083 "Attempt to write to ZIP archive that was already closed")
1084
Guido van Rossum85825dc2007-08-27 17:03:28 +00001085 zinfo.file_size = len(data) # Uncompressed size
1086 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001087 self._writecheck(zinfo)
1088 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001089 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 if zinfo.compress_type == ZIP_DEFLATED:
1091 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1092 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001093 data = co.compress(data) + co.flush()
1094 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 else:
1096 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001097 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001099 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001100 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001102 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001103 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001104 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 self.filelist.append(zinfo)
1106 self.NameToInfo[zinfo.filename] = zinfo
1107
1108 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001109 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001110 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111
1112 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001113 """Close the file, and for mode "w" and "a" write the ending
1114 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001115 if self.fp is None:
1116 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001117
1118 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 count = 0
1120 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001121 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 count = count + 1
1123 dt = zinfo.date_time
1124 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001125 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001126 extra = []
1127 if zinfo.file_size > ZIP64_LIMIT \
1128 or zinfo.compress_size > ZIP64_LIMIT:
1129 extra.append(zinfo.file_size)
1130 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001131 file_size = 0xffffffff
1132 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133 else:
1134 file_size = zinfo.file_size
1135 compress_size = zinfo.compress_size
1136
1137 if zinfo.header_offset > ZIP64_LIMIT:
1138 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001139 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 else:
1141 header_offset = zinfo.header_offset
1142
1143 extra_data = zinfo.extra
1144 if extra:
1145 # Append a ZIP64 field to the extra's
1146 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001147 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001148 1, 8*len(extra), *extra) + extra_data
1149
1150 extract_version = max(45, zinfo.extract_version)
1151 create_version = max(45, zinfo.create_version)
1152 else:
1153 extract_version = zinfo.extract_version
1154 create_version = zinfo.create_version
1155
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001156 try:
1157 filename, flag_bits = zinfo._encodeFilenameFlags()
1158 centdir = struct.pack(structCentralDir,
Georg Brandl2ee470f2008-07-16 12:55:28 +00001159 stringCentralDir, create_version,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001160 zinfo.create_system, extract_version, zinfo.reserved,
1161 flag_bits, zinfo.compress_type, dostime, dosdate,
1162 zinfo.CRC, compress_size, file_size,
1163 len(filename), len(extra_data), len(zinfo.comment),
1164 0, zinfo.internal_attr, zinfo.external_attr,
1165 header_offset)
1166 except DeprecationWarning:
1167 print >>sys.stderr, (structCentralDir,
1168 stringCentralDir, create_version,
1169 zinfo.create_system, extract_version, zinfo.reserved,
1170 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1171 zinfo.CRC, compress_size, file_size,
1172 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1173 0, zinfo.internal_attr, zinfo.external_attr,
1174 header_offset)
1175 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001177 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001178 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001180
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 pos2 = self.fp.tell()
1182 # Write end-of-zip-archive record
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001183 centDirOffset = pos1
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001184 if pos1 > ZIP64_LIMIT:
1185 # Need to write the ZIP64 end-of-archive records
1186 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001187 structEndArchive64, stringEndArchive64,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001188 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1189 self.fp.write(zip64endrec)
1190
1191 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001192 structEndArchive64Locator,
1193 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001194 self.fp.write(zip64locrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001195 centDirOffset = 0xFFFFFFFF
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001196
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001197 # check for valid comment length
1198 if len(self.comment) >= ZIP_MAX_COMMENT:
1199 if self.debug > 0:
1200 msg = 'Archive comment is too long; truncating to %d bytes' \
1201 % ZIP_MAX_COMMENT
1202 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001203
Georg Brandl2ee470f2008-07-16 12:55:28 +00001204 endrec = struct.pack(structEndArchive, stringEndArchive,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001205 0, 0, count % ZIP_FILECOUNT_LIMIT,
1206 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1207 centDirOffset, len(self.comment))
1208 self.fp.write(endrec)
1209 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001210 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001211
Fred Drake3d9091e2001-03-26 15:49:24 +00001212 if not self._filePassed:
1213 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 self.fp = None
1215
1216
1217class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001218 """Class to create ZIP archives with Python library files and packages."""
1219
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001220 def writepy(self, pathname, basename = ""):
1221 """Add all files from "pathname" to the ZIP archive.
1222
Fred Drake484d7352000-10-02 21:14:52 +00001223 If pathname is a package directory, search the directory and
1224 all package subdirectories recursively for all *.py and enter
1225 the modules into the archive. If pathname is a plain
1226 directory, listdir *.py and enter all modules. Else, pathname
1227 must be a Python *.py file and the module will be put into the
1228 archive. Added modules are always module.pyo or module.pyc.
1229 This method will compile the module.py into module.pyc if
1230 necessary.
1231 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001232 dir, name = os.path.split(pathname)
1233 if os.path.isdir(pathname):
1234 initname = os.path.join(pathname, "__init__.py")
1235 if os.path.isfile(initname):
1236 # This is a package directory, add it
1237 if basename:
1238 basename = "%s/%s" % (basename, name)
1239 else:
1240 basename = name
1241 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001242 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243 fname, arcname = self._get_codename(initname[0:-3], basename)
1244 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001245 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001246 self.write(fname, arcname)
1247 dirlist = os.listdir(pathname)
1248 dirlist.remove("__init__.py")
1249 # Add all *.py files and package subdirectories
1250 for filename in dirlist:
1251 path = os.path.join(pathname, filename)
1252 root, ext = os.path.splitext(filename)
1253 if os.path.isdir(path):
1254 if os.path.isfile(os.path.join(path, "__init__.py")):
1255 # This is a package directory, add it
1256 self.writepy(path, basename) # Recursive call
1257 elif ext == ".py":
1258 fname, arcname = self._get_codename(path[0:-3],
1259 basename)
1260 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001261 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001262 self.write(fname, arcname)
1263 else:
1264 # This is NOT a package directory, add its files at top level
1265 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001266 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267 for filename in os.listdir(pathname):
1268 path = os.path.join(pathname, filename)
1269 root, ext = os.path.splitext(filename)
1270 if ext == ".py":
1271 fname, arcname = self._get_codename(path[0:-3],
1272 basename)
1273 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001274 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001275 self.write(fname, arcname)
1276 else:
1277 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001278 raise RuntimeError(
1279 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 fname, arcname = self._get_codename(pathname[0:-3], basename)
1281 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001282 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 self.write(fname, arcname)
1284
1285 def _get_codename(self, pathname, basename):
1286 """Return (filename, archivename) for the path.
1287
Fred Drake484d7352000-10-02 21:14:52 +00001288 Given a module name path, return the correct file path and
1289 archive name, compiling if necessary. For example, given
1290 /python/lib/string, return (/python/lib/string.pyc, string).
1291 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001292 file_py = pathname + ".py"
1293 file_pyc = pathname + ".pyc"
1294 file_pyo = pathname + ".pyo"
1295 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001296 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001297 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001299 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001300 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001302 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001303 try:
1304 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001305 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001306 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 fname = file_pyc
1308 else:
1309 fname = file_pyc
1310 archivename = os.path.split(fname)[1]
1311 if basename:
1312 archivename = "%s/%s" % (basename, archivename)
1313 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001314
1315
1316def main(args = None):
1317 import textwrap
1318 USAGE=textwrap.dedent("""\
1319 Usage:
1320 zipfile.py -l zipfile.zip # Show listing of a zipfile
1321 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1322 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1323 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1324 """)
1325 if args is None:
1326 args = sys.argv[1:]
1327
1328 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001329 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001330 sys.exit(1)
1331
1332 if args[0] == '-l':
1333 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001334 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001335 sys.exit(1)
1336 zf = ZipFile(args[1], 'r')
1337 zf.printdir()
1338 zf.close()
1339
1340 elif args[0] == '-t':
1341 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001342 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343 sys.exit(1)
1344 zf = ZipFile(args[1], 'r')
1345 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001346 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001347
1348 elif args[0] == '-e':
1349 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001350 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001351 sys.exit(1)
1352
1353 zf = ZipFile(args[1], 'r')
1354 out = args[2]
1355 for path in zf.namelist():
1356 if path.startswith('./'):
1357 tgt = os.path.join(out, path[2:])
1358 else:
1359 tgt = os.path.join(out, path)
1360
1361 tgtdir = os.path.dirname(tgt)
1362 if not os.path.exists(tgtdir):
1363 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001364 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001365 fp.write(zf.read(path))
1366 fp.close()
1367 zf.close()
1368
1369 elif args[0] == '-c':
1370 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001371 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001372 sys.exit(1)
1373
1374 def addToZip(zf, path, zippath):
1375 if os.path.isfile(path):
1376 zf.write(path, zippath, ZIP_DEFLATED)
1377 elif os.path.isdir(path):
1378 for nm in os.listdir(path):
1379 addToZip(zf,
1380 os.path.join(path, nm), os.path.join(zippath, nm))
1381 # else: ignore
1382
1383 zf = ZipFile(args[1], 'w', allowZip64=True)
1384 for src in args[2:]:
1385 addToZip(zf, src, os.path.basename(src))
1386
1387 zf.close()
1388
1389if __name__ == "__main__":
1390 main()