blob: a382383e1773d5e97a8fa83f6a3e8e0095718937 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandlaba97962010-11-26 08:37:46 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
176 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
R. David Murray93a59652010-01-06 20:12:07 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000214 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000215 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append(b"")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239 comment = data[start+sizeEndCentDir:]
240 # check that comment length is correct
241 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000242 # Append the archive comment and start offset
243 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000244 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000245
246 # Try to read the "Zip64 end of central directory" structure
247 return _EndRecData64(fpin, maxCommentStart + start - filesize,
248 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000249
250 # Unable to find a valid end of central directory structure
251 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252
Fred Drake484d7352000-10-02 21:14:52 +0000253
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000254class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000255 """Class with attributes describing each file in the ZIP archive."""
256
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000257 __slots__ = (
258 'orig_filename',
259 'filename',
260 'date_time',
261 'compress_type',
262 'comment',
263 'extra',
264 'create_system',
265 'create_version',
266 'extract_version',
267 'reserved',
268 'flag_bits',
269 'volume',
270 'internal_attr',
271 'external_attr',
272 'header_offset',
273 'CRC',
274 'compress_size',
275 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000276 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000277 )
278
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000279 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000280 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000281
282 # Terminate the file name at the first null byte. Null bytes in file
283 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000284 null_byte = filename.find(chr(0))
285 if null_byte >= 0:
286 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000287 # This is used to ensure paths in generated ZIP files always use
288 # forward slashes as the directory separator, as required by the
289 # ZIP format specification.
290 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000291 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292
Greg Ward8e36d282003-06-18 00:53:06 +0000293 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000294 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000295 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000296 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000297 self.comment = b"" # Comment for each file
298 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000299 if sys.platform == 'win32':
300 self.create_system = 0 # System which created ZIP archive
301 else:
302 # Assume everything else is unix-y
303 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000304 self.create_version = 20 # Version which created ZIP archive
305 self.extract_version = 20 # Version needed to extract archive
306 self.reserved = 0 # Must be zero
307 self.flag_bits = 0 # ZIP flag bits
308 self.volume = 0 # Volume number of file header
309 self.internal_attr = 0 # Internal attributes
310 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000311 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000312 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000313 # CRC CRC-32 of the uncompressed file
314 # compress_size Size of the compressed file
315 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316
317 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 dt = self.date_time
320 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000321 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000323 # Set these to zero because we write them after the file data
324 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325 else:
Tim Peterse1190062001-01-15 03:34:38 +0000326 CRC = self.CRC
327 compress_size = self.compress_size
328 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
330 extra = self.extra
331
332 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
333 # File is larger than what fits into a 4 byte integer,
334 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000335 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336 extra = extra + struct.pack(fmt,
337 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000338 file_size = 0xffffffff
339 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000340 self.extract_version = max(45, self.extract_version)
341 self.create_version = max(45, self.extract_version)
342
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000343 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000344 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000345 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000346 self.compress_type, dostime, dosdate, CRC,
347 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000348 len(filename), len(extra))
349 return header + filename + extra
350
351 def _encodeFilenameFlags(self):
352 try:
353 return self.filename.encode('ascii'), self.flag_bits
354 except UnicodeEncodeError:
355 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000356
357 def _decodeExtra(self):
358 # Try to decode the extra field.
359 extra = self.extra
360 unpack = struct.unpack
361 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000362 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 if tp == 1:
364 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000367 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000369 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370 elif ln == 0:
371 counts = ()
372 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000373 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374
375 idx = 0
376
377 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000378 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 self.file_size = counts[idx]
380 idx += 1
381
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000382 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 self.compress_size = counts[idx]
384 idx += 1
385
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000386 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000387 old = self.header_offset
388 self.header_offset = counts[idx]
389 idx+=1
390
391 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392
393
Thomas Wouterscf297e42007-02-23 15:07:44 +0000394class _ZipDecrypter:
395 """Class to handle decryption of files stored within a ZIP archive.
396
397 ZIP supports a password-based form of encryption. Even though known
398 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000399 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000400
401 Usage:
402 zd = _ZipDecrypter(mypwd)
403 plain_char = zd(cypher_char)
404 plain_text = map(zd, cypher_text)
405 """
406
407 def _GenerateCRCTable():
408 """Generate a CRC-32 table.
409
410 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
411 internal keys. We noticed that a direct implementation is faster than
412 relying on binascii.crc32().
413 """
414 poly = 0xedb88320
415 table = [0] * 256
416 for i in range(256):
417 crc = i
418 for j in range(8):
419 if crc & 1:
420 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
421 else:
422 crc = ((crc >> 1) & 0x7FFFFFFF)
423 table[i] = crc
424 return table
425 crctable = _GenerateCRCTable()
426
427 def _crc32(self, ch, crc):
428 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000429 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000430
431 def __init__(self, pwd):
432 self.key0 = 305419896
433 self.key1 = 591751049
434 self.key2 = 878082192
435 for p in pwd:
436 self._UpdateKeys(p)
437
438 def _UpdateKeys(self, c):
439 self.key0 = self._crc32(c, self.key0)
440 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
441 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000442 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443
444 def __call__(self, c):
445 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000446 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000447 k = self.key2 | 2
448 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000449 self._UpdateKeys(c)
450 return c
451
Guido van Rossumd8faa362007-04-27 19:54:29 +0000452class ZipExtFile:
453 """File-like object for reading an archive member.
454 Is returned by ZipFile.open().
455 """
456
457 def __init__(self, fileobj, zipinfo, decrypt=None):
458 self.fileobj = fileobj
459 self.decrypter = decrypt
460 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000461 self.rawbuffer = b''
462 self.readbuffer = b''
463 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000464 self.eof = False
465 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000466 self.nlSeps = (b"\n", )
467 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000468
469 self.compress_type = zipinfo.compress_type
470 self.compress_size = zipinfo.compress_size
471
472 self.closed = False
473 self.mode = "r"
474 self.name = zipinfo.filename
475
476 # read from compressed files in 64k blocks
477 self.compreadsize = 64*1024
478 if self.compress_type == ZIP_DEFLATED:
479 self.dc = zlib.decompressobj(-15)
480
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +0000481 if hasattr(zipinfo, 'CRC'):
482 self._expected_crc = zipinfo.CRC
483 self._running_crc = crc32(b'') & 0xffffffff
484 else:
485 self._expected_crc = None
486
Guido van Rossumd8faa362007-04-27 19:54:29 +0000487 def set_univ_newlines(self, univ_newlines):
488 self.univ_newlines = univ_newlines
489
490 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000491 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000493 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000494
495 def __iter__(self):
496 return self
497
498 def __next__(self):
499 nextline = self.readline()
500 if not nextline:
501 raise StopIteration()
502
503 return nextline
504
505 def close(self):
506 self.closed = True
507
508 def _checkfornewline(self):
509 nl, nllen = -1, -1
510 if self.linebuffer:
511 # ugly check for cases where half of an \r\n pair was
512 # read on the last pass, and the \r was discarded. In this
513 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000514 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000515 self.linebuffer = self.linebuffer[1:]
516
517 for sep in self.nlSeps:
518 nl = self.linebuffer.find(sep)
519 if nl >= 0:
520 nllen = len(sep)
521 return nl, nllen
522
523 return nl, nllen
524
525 def readline(self, size = -1):
526 """Read a line with approx. size. If size is negative,
527 read a whole line.
528 """
529 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000530 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000532 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000533
534 # check for a newline already in buffer
535 nl, nllen = self._checkfornewline()
536
537 if nl >= 0:
538 # the next line was already in the buffer
539 nl = min(nl, size)
540 else:
541 # no line break in buffer - try to read more
542 size -= len(self.linebuffer)
543 while nl < 0 and size > 0:
544 buf = self.read(min(size, 100))
545 if not buf:
546 break
547 self.linebuffer += buf
548 size -= len(buf)
549
550 # check for a newline in buffer
551 nl, nllen = self._checkfornewline()
552
553 # we either ran out of bytes in the file, or
554 # met the specified size limit without finding a newline,
555 # so return current buffer
556 if nl < 0:
557 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000558 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559 return s
560
561 buf = self.linebuffer[:nl]
562 self.lastdiscard = self.linebuffer[nl:nl + nllen]
563 self.linebuffer = self.linebuffer[nl + nllen:]
564
565 # line is always returned with \n as newline char (except possibly
566 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000567 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000568
569 def readlines(self, sizehint = -1):
570 """Return a list with all (following) lines. The sizehint parameter
571 is ignored in this implementation.
572 """
573 result = []
574 while True:
575 line = self.readline()
576 if not line: break
577 result.append(line)
578 return result
579
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +0000580 def _update_crc(self, newdata, eof):
581 # Update the CRC using the given data.
582 if self._expected_crc is None:
583 # No need to compute the CRC if we don't have a reference value
584 return
585 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
586 # Check the CRC if we're at the end of the file
587 if eof and self._running_crc != self._expected_crc:
588 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
589
Guido van Rossumd8faa362007-04-27 19:54:29 +0000590 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000591 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000592 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000593 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000594
595 # determine read size
596 bytesToRead = self.compress_size - self.bytes_read
597
598 # adjust read size for encrypted files since the first 12 bytes
599 # are for the encryption/password information
600 if self.decrypter is not None:
601 bytesToRead -= 12
602
603 if size is not None and size >= 0:
604 if self.compress_type == ZIP_STORED:
605 lr = len(self.readbuffer)
606 bytesToRead = min(bytesToRead, size - lr)
607 elif self.compress_type == ZIP_DEFLATED:
608 if len(self.readbuffer) > size:
609 # the user has requested fewer bytes than we've already
610 # pulled through the decompressor; don't read any more
611 bytesToRead = 0
612 else:
613 # user will use up the buffer, so read some more
614 lr = len(self.rawbuffer)
615 bytesToRead = min(bytesToRead, self.compreadsize - lr)
616
617 # avoid reading past end of file contents
618 if bytesToRead + self.bytes_read > self.compress_size:
619 bytesToRead = self.compress_size - self.bytes_read
620
621 # try to read from file (if necessary)
622 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000623 data = self.fileobj.read(bytesToRead)
624 self.bytes_read += len(data)
625 try:
626 self.rawbuffer += data
627 except:
628 print(repr(self.fileobj), repr(self.rawbuffer),
629 repr(data))
630 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000631
632 # handle contents of raw buffer
633 if self.rawbuffer:
634 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000635 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636
637 # decrypt new data if we were given an object to handle that
638 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000639 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640
641 # decompress newly read data if necessary
642 if newdata and self.compress_type == ZIP_DEFLATED:
643 newdata = self.dc.decompress(newdata)
644 self.rawbuffer = self.dc.unconsumed_tail
645 if self.eof and len(self.rawbuffer) == 0:
646 # we're out of raw bytes (both from the file and
647 # the local buffer); flush just to make sure the
648 # decompressor is done
649 newdata += self.dc.flush()
650 # prevent decompressor from being used again
651 self.dc = None
652
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +0000653 self._update_crc(newdata, eof=(
654 self.compress_size == self.bytes_read and
655 len(self.rawbuffer) == 0))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656 self.readbuffer += newdata
657
Guido van Rossumd8faa362007-04-27 19:54:29 +0000658 # return what the user asked for
659 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000660 data = self.readbuffer
661 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000663 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000664 self.readbuffer = self.readbuffer[size:]
665
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000666 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000667
668
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000669class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000670 """ Class with methods to open, read, write, close, list zip files.
671
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000672 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000673
Fred Drake3d9091e2001-03-26 15:49:24 +0000674 file: Either the path to the file, or a file-like object.
675 If it is a path, the file will be opened and closed by ZipFile.
676 mode: The mode can be either read "r", write "w" or append "a".
677 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000678 allowZip64: if True ZipFile will create files with ZIP64 extensions when
679 needed, otherwise it will raise an exception when this would
680 be necessary.
681
Fred Drake3d9091e2001-03-26 15:49:24 +0000682 """
Fred Drake484d7352000-10-02 21:14:52 +0000683
Fred Drake90eac282001-02-28 05:29:34 +0000684 fp = None # Set here since __del__ checks it
685
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000686 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000687 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000688 if mode not in ("r", "w", "a"):
689 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
690
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000691 if compression == ZIP_STORED:
692 pass
693 elif compression == ZIP_DEFLATED:
694 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000695 raise RuntimeError(
696 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000697 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000698 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000699
700 self._allowZip64 = allowZip64
701 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000702 self.debug = 0 # Level of printing: 0 through 3
703 self.NameToInfo = {} # Find file info given name
704 self.filelist = [] # List of ZipInfo instances for archive
705 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000706 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000707 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000708 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000709
Fred Drake3d9091e2001-03-26 15:49:24 +0000710 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000711 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000712 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000713 self._filePassed = 0
714 self.filename = file
715 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000716 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000717 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000718 except IOError:
719 if mode == 'a':
720 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000721 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000722 else:
723 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000724 else:
725 self._filePassed = 1
726 self.fp = file
727 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000728
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 self._GetContents()
731 elif key == 'w':
Georg Brandlaba97962010-11-26 08:37:46 +0000732 # set the modified flag so central directory gets written
733 # even if no files are added to the archive
734 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 elif key == 'a':
Georg Brandlaba97962010-11-26 08:37:46 +0000736 try:
737 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000738 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000740 self.fp.seek(self.start_dir, 0)
Georg Brandlaba97962010-11-26 08:37:46 +0000741 except BadZipfile:
742 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000743 self.fp.seek(0, 2)
Georg Brandlaba97962010-11-26 08:37:46 +0000744
745 # set the modified flag so central directory gets written
746 # even if no files are added to the archive
747 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000748 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000749 if not self._filePassed:
750 self.fp.close()
751 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000752 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753
754 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000755 """Read the directory, making sure we close the file if the format
756 is bad."""
757 try:
758 self._RealGetContents()
759 except BadZipfile:
760 if not self._filePassed:
761 self.fp.close()
762 self.fp = None
763 raise
764
765 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000766 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 fp = self.fp
Georg Brandlaba97962010-11-26 08:37:46 +0000768 try:
769 endrec = _EndRecData(fp)
770 except IOError:
771 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000772 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000773 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000775 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000776 size_cd = endrec[_ECD_SIZE] # bytes in central directory
777 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
778 self.comment = endrec[_ECD_COMMENT] # archive comment
779
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000781 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000782 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
783 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000784 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
785
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000787 inferred = concat + offset_cd
788 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 # self.start_dir: Position of start of central directory
790 self.start_dir = offset_cd + concat
791 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000792 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000793 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 total = 0
795 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000796 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000797 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000798 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 centdir = struct.unpack(structCentralDir, centdir)
800 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000801 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000802 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000803 flags = centdir[5]
804 if flags & 0x800:
805 # UTF-8 file names extension
806 filename = filename.decode('utf-8')
807 else:
808 # Historical ZIP filename encoding
809 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000811 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000812 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
813 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000814 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000815 (x.create_version, x.create_system, x.extract_version, x.reserved,
816 x.flag_bits, x.compress_type, t, d,
817 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
818 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
819 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000820 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000822 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000823
824 x._decodeExtra()
825 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826 self.filelist.append(x)
827 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000828
829 # update total bytes read from central directory
830 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
831 + centdir[_CD_EXTRA_FIELD_LENGTH]
832 + centdir[_CD_COMMENT_LENGTH])
833
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000835 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000836
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837
838 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000839 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 l = []
841 for data in self.filelist:
842 l.append(data.filename)
843 return l
844
845 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000846 """Return a list of class ZipInfo instances for files in the
847 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848 return self.filelist
849
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000850 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000851 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000852 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
853 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000855 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000856 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
857 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858
859 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000860 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000861 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862 for zinfo in self.filelist:
863 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000864 # Read by chunks, to avoid an OverflowError or a
865 # MemoryError with very large embedded files.
866 f = self.open(zinfo.filename, "r")
867 while f.read(chunk_size): # Check CRC-32
868 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000869 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000870 return zinfo.filename
871
872 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000873 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000874 info = self.NameToInfo.get(name)
875 if info is None:
876 raise KeyError(
877 'There is no item named %r in the archive' % name)
878
879 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000880
Thomas Wouterscf297e42007-02-23 15:07:44 +0000881 def setpassword(self, pwd):
882 """Set default password for encrypted files."""
R. David Murray30f9c8c2010-12-21 21:57:54 +0000883 if pwd and not isinstance(pwd, bytes):
884 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
885 if pwd:
886 self.pwd = pwd
887 else:
888 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000889
890 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000891 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892 return self.open(name, "r", pwd).read()
893
894 def open(self, name, mode="r", pwd=None):
895 """Return file-like object for 'name'."""
896 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000897 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray30f9c8c2010-12-21 21:57:54 +0000898 if pwd and not isinstance(pwd, bytes):
899 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000900 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000901 raise RuntimeError(
902 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000903
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904 # Only open a new file for instances where we were not
905 # given a file object in the constructor
906 if self._filePassed:
907 zef_file = self.fp
908 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000909 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910
Georg Brandlb533e262008-05-25 18:19:30 +0000911 # Make sure we have an info object
912 if isinstance(name, ZipInfo):
913 # 'name' is already an info object
914 zinfo = name
915 else:
916 # Get info object for name
917 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918
919 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000920
921 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000922 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000923 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000924 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000925
926 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000928 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000930
Victor Stinnerff1d2f42011-05-18 13:43:23 +0200931 if zinfo.flag_bits & 0x800:
932 # UTF-8 filename
933 fname_str = fname.decode("utf-8")
934 else:
935 fname_str = fname.decode("cp437")
936
937 if fname_str != zinfo.orig_filename:
Collin Winterce36ad82007-08-30 01:19:48 +0000938 raise BadZipfile(
939 'File name in directory %r and header %r differ.'
940 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000941
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942 # check for encrypted flag & handle password
943 is_encrypted = zinfo.flag_bits & 0x1
944 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000945 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 if not pwd:
947 pwd = self.pwd
948 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000949 raise RuntimeError("File %s is encrypted, "
950 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951
Thomas Wouterscf297e42007-02-23 15:07:44 +0000952 zd = _ZipDecrypter(pwd)
953 # The first 12 bytes in the cypher stream is an encryption header
954 # used to strengthen the algorithm. The first 11 bytes are
955 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000956 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000957 # and is used to check the correctness of the password.
R. David Murray30f9c8c2010-12-21 21:57:54 +0000958 header = zef_file.read(12)
959 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000960 if zinfo.flag_bits & 0x8:
961 # compare against the file type from extended local headers
962 check_byte = (zinfo._raw_time >> 8) & 0xff
963 else:
964 # compare against the CRC otherwise
965 check_byte = (zinfo.CRC >> 24) & 0xff
966 if h[11] != check_byte:
967 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968
969 # build and return a ZipExtFile
970 if zd is None:
971 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000973 zef = ZipExtFile(zef_file, zinfo, zd)
974
975 # set universal newlines on ZipExtFile if necessary
976 if "U" in mode:
977 zef.set_univ_newlines(True)
978 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000979
Christian Heimes790c8232008-01-07 21:14:23 +0000980 def extract(self, member, path=None, pwd=None):
981 """Extract a member from the archive to the current working directory,
982 using its full name. Its file information is extracted as accurately
983 as possible. `member' may be a filename or a ZipInfo object. You can
984 specify a different directory using `path'.
985 """
986 if not isinstance(member, ZipInfo):
987 member = self.getinfo(member)
988
989 if path is None:
990 path = os.getcwd()
991
992 return self._extract_member(member, path, pwd)
993
994 def extractall(self, path=None, members=None, pwd=None):
995 """Extract all members from the archive to the current working
996 directory. `path' specifies a different directory to extract to.
997 `members' is optional and must be a subset of the list returned
998 by namelist().
999 """
1000 if members is None:
1001 members = self.namelist()
1002
1003 for zipinfo in members:
1004 self.extract(zipinfo, path, pwd)
1005
1006 def _extract_member(self, member, targetpath, pwd):
1007 """Extract the ZipInfo object 'member' to a physical
1008 file on the path targetpath.
1009 """
1010 # build the destination pathname, replacing
1011 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001012 # Strip trailing path separator, unless it represents the root.
1013 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1014 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001015 targetpath = targetpath[:-1]
1016
1017 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001018 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001019 targetpath = os.path.join(targetpath, member.filename[1:])
1020 else:
1021 targetpath = os.path.join(targetpath, member.filename)
1022
1023 targetpath = os.path.normpath(targetpath)
1024
1025 # Create all upper directories if necessary.
1026 upperdirs = os.path.dirname(targetpath)
1027 if upperdirs and not os.path.exists(upperdirs):
1028 os.makedirs(upperdirs)
1029
Martin v. Löwis59e47792009-01-24 14:10:07 +00001030 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001031 if not os.path.isdir(targetpath):
1032 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001033 return targetpath
1034
Georg Brandlb533e262008-05-25 18:19:30 +00001035 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001036 target = open(targetpath, "wb")
1037 shutil.copyfileobj(source, target)
1038 source.close()
1039 target.close()
1040
1041 return targetpath
1042
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001044 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001045 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001046 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001047 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001049 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001051 raise RuntimeError(
1052 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001054 raise RuntimeError(
1055 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001057 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001058 if zinfo.file_size > ZIP64_LIMIT:
1059 if not self._allowZip64:
1060 raise LargeZipFile("Filesize would require ZIP64 extensions")
1061 if zinfo.header_offset > ZIP64_LIMIT:
1062 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001063 raise LargeZipFile(
1064 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065
1066 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001067 """Put the bytes from filename into the archive under the name
1068 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001069 if not self.fp:
1070 raise RuntimeError(
1071 "Attempt to write to ZIP archive that was already closed")
1072
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001074 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001075 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 date_time = mtime[0:6]
1077 # Create ZipInfo instance to store file information
1078 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001079 arcname = filename
1080 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1081 while arcname[0] in (os.sep, os.altsep):
1082 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001083 if isdir:
1084 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001085 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001086 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001088 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 else:
Tim Peterse1190062001-01-15 03:34:38 +00001090 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001091
1092 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001093 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001094 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001095
1096 self._writecheck(zinfo)
1097 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001098
1099 if isdir:
1100 zinfo.file_size = 0
1101 zinfo.compress_size = 0
1102 zinfo.CRC = 0
1103 self.filelist.append(zinfo)
1104 self.NameToInfo[zinfo.filename] = zinfo
1105 self.fp.write(zinfo.FileHeader())
1106 return
1107
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001108 with open(filename, "rb") as fp:
1109 # Must overwrite CRC and sizes with correct data later
1110 zinfo.CRC = CRC = 0
1111 zinfo.compress_size = compress_size = 0
1112 zinfo.file_size = file_size = 0
1113 self.fp.write(zinfo.FileHeader())
1114 if zinfo.compress_type == ZIP_DEFLATED:
1115 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1116 zlib.DEFLATED, -15)
1117 else:
1118 cmpr = None
1119 while 1:
1120 buf = fp.read(1024 * 8)
1121 if not buf:
1122 break
1123 file_size = file_size + len(buf)
1124 CRC = crc32(buf, CRC) & 0xffffffff
1125 if cmpr:
1126 buf = cmpr.compress(buf)
1127 compress_size = compress_size + len(buf)
1128 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 if cmpr:
1130 buf = cmpr.flush()
1131 compress_size = compress_size + len(buf)
1132 self.fp.write(buf)
1133 zinfo.compress_size = compress_size
1134 else:
1135 zinfo.compress_size = file_size
1136 zinfo.CRC = CRC
1137 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001138 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001139 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001140 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001141 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001143 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144 self.filelist.append(zinfo)
1145 self.NameToInfo[zinfo.filename] = zinfo
1146
Guido van Rossum85825dc2007-08-27 17:03:28 +00001147 def writestr(self, zinfo_or_arcname, data):
1148 """Write a file into the archive. The contents is 'data', which
1149 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1150 it is encoded as UTF-8 first.
1151 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001152 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001153 if isinstance(data, str):
1154 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001155 if not isinstance(zinfo_or_arcname, ZipInfo):
1156 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001157 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001158 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001159 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001160 else:
1161 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001162
1163 if not self.fp:
1164 raise RuntimeError(
1165 "Attempt to write to ZIP archive that was already closed")
1166
Guido van Rossum85825dc2007-08-27 17:03:28 +00001167 zinfo.file_size = len(data) # Uncompressed size
1168 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001169 self._writecheck(zinfo)
1170 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001171 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001172 if zinfo.compress_type == ZIP_DEFLATED:
1173 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1174 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001175 data = co.compress(data) + co.flush()
1176 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177 else:
1178 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001179 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001181 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001182 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001184 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001185 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001186 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001187 self.filelist.append(zinfo)
1188 self.NameToInfo[zinfo.filename] = zinfo
1189
1190 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001191 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001192 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001193
1194 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001195 """Close the file, and for mode "w" and "a" write the ending
1196 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001197 if self.fp is None:
1198 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001199
1200 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 count = 0
1202 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001203 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 count = count + 1
1205 dt = zinfo.date_time
1206 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001207 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001208 extra = []
1209 if zinfo.file_size > ZIP64_LIMIT \
1210 or zinfo.compress_size > ZIP64_LIMIT:
1211 extra.append(zinfo.file_size)
1212 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001213 file_size = 0xffffffff
1214 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001215 else:
1216 file_size = zinfo.file_size
1217 compress_size = zinfo.compress_size
1218
1219 if zinfo.header_offset > ZIP64_LIMIT:
1220 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001221 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001222 else:
1223 header_offset = zinfo.header_offset
1224
1225 extra_data = zinfo.extra
1226 if extra:
1227 # Append a ZIP64 field to the extra's
1228 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001229 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230 1, 8*len(extra), *extra) + extra_data
1231
1232 extract_version = max(45, zinfo.extract_version)
1233 create_version = max(45, zinfo.create_version)
1234 else:
1235 extract_version = zinfo.extract_version
1236 create_version = zinfo.create_version
1237
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001238 try:
1239 filename, flag_bits = zinfo._encodeFilenameFlags()
1240 centdir = struct.pack(structCentralDir,
Ezio Melotti6e52a5a2009-09-25 21:38:29 +00001241 stringCentralDir, create_version,
1242 zinfo.create_system, extract_version, zinfo.reserved,
1243 flag_bits, zinfo.compress_type, dostime, dosdate,
1244 zinfo.CRC, compress_size, file_size,
1245 len(filename), len(extra_data), len(zinfo.comment),
1246 0, zinfo.internal_attr, zinfo.external_attr,
1247 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001248 except DeprecationWarning:
Ezio Melotti6e52a5a2009-09-25 21:38:29 +00001249 print((structCentralDir, stringCentralDir, create_version,
1250 zinfo.create_system, extract_version, zinfo.reserved,
1251 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1252 zinfo.CRC, compress_size, file_size,
1253 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1254 0, zinfo.internal_attr, zinfo.external_attr,
1255 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001256 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001257 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001258 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001259 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001261
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001262 pos2 = self.fp.tell()
1263 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001264 centDirCount = count
1265 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001266 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001267 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1268 centDirOffset > ZIP64_LIMIT or
1269 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001270 # Need to write the ZIP64 end-of-archive records
1271 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001272 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001273 44, 45, 45, 0, 0, centDirCount, centDirCount,
1274 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001275 self.fp.write(zip64endrec)
1276
1277 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001278 structEndArchive64Locator,
1279 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001280 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001281 centDirCount = min(centDirCount, 0xFFFF)
1282 centDirSize = min(centDirSize, 0xFFFFFFFF)
1283 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001284
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001285 # check for valid comment length
1286 if len(self.comment) >= ZIP_MAX_COMMENT:
1287 if self.debug > 0:
1288 msg = 'Archive comment is too long; truncating to %d bytes' \
1289 % ZIP_MAX_COMMENT
1290 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001291
Georg Brandl2ee470f2008-07-16 12:55:28 +00001292 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001293 0, 0, centDirCount, centDirCount,
1294 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001295 self.fp.write(endrec)
1296 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001297 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001298
Fred Drake3d9091e2001-03-26 15:49:24 +00001299 if not self._filePassed:
1300 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 self.fp = None
1302
1303
1304class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001305 """Class to create ZIP archives with Python library files and packages."""
1306
Georg Brandlb044b2a2009-09-16 16:05:59 +00001307 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 """Add all files from "pathname" to the ZIP archive.
1309
Fred Drake484d7352000-10-02 21:14:52 +00001310 If pathname is a package directory, search the directory and
1311 all package subdirectories recursively for all *.py and enter
1312 the modules into the archive. If pathname is a plain
1313 directory, listdir *.py and enter all modules. Else, pathname
1314 must be a Python *.py file and the module will be put into the
1315 archive. Added modules are always module.pyo or module.pyc.
1316 This method will compile the module.py into module.pyc if
1317 necessary.
1318 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 dir, name = os.path.split(pathname)
1320 if os.path.isdir(pathname):
1321 initname = os.path.join(pathname, "__init__.py")
1322 if os.path.isfile(initname):
1323 # This is a package directory, add it
1324 if basename:
1325 basename = "%s/%s" % (basename, name)
1326 else:
1327 basename = name
1328 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001329 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001330 fname, arcname = self._get_codename(initname[0:-3], basename)
1331 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001332 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 self.write(fname, arcname)
1334 dirlist = os.listdir(pathname)
1335 dirlist.remove("__init__.py")
1336 # Add all *.py files and package subdirectories
1337 for filename in dirlist:
1338 path = os.path.join(pathname, filename)
1339 root, ext = os.path.splitext(filename)
1340 if os.path.isdir(path):
1341 if os.path.isfile(os.path.join(path, "__init__.py")):
1342 # This is a package directory, add it
1343 self.writepy(path, basename) # Recursive call
1344 elif ext == ".py":
1345 fname, arcname = self._get_codename(path[0:-3],
1346 basename)
1347 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001348 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 self.write(fname, arcname)
1350 else:
1351 # This is NOT a package directory, add its files at top level
1352 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001353 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354 for filename in os.listdir(pathname):
1355 path = os.path.join(pathname, filename)
1356 root, ext = os.path.splitext(filename)
1357 if ext == ".py":
1358 fname, arcname = self._get_codename(path[0:-3],
1359 basename)
1360 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001361 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 self.write(fname, arcname)
1363 else:
1364 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001365 raise RuntimeError(
1366 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 fname, arcname = self._get_codename(pathname[0:-3], basename)
1368 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001369 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001370 self.write(fname, arcname)
1371
1372 def _get_codename(self, pathname, basename):
1373 """Return (filename, archivename) for the path.
1374
Fred Drake484d7352000-10-02 21:14:52 +00001375 Given a module name path, return the correct file path and
1376 archive name, compiling if necessary. For example, given
1377 /python/lib/string, return (/python/lib/string.pyc, string).
1378 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379 file_py = pathname + ".py"
1380 file_pyc = pathname + ".pyc"
1381 file_pyo = pathname + ".pyo"
1382 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001383 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001384 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001385 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001386 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001387 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001389 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001390 try:
1391 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001392 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001393 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001394 fname = file_pyc
1395 else:
1396 fname = file_pyc
1397 archivename = os.path.split(fname)[1]
1398 if basename:
1399 archivename = "%s/%s" % (basename, archivename)
1400 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001401
1402
1403def main(args = None):
1404 import textwrap
1405 USAGE=textwrap.dedent("""\
1406 Usage:
1407 zipfile.py -l zipfile.zip # Show listing of a zipfile
1408 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1409 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1410 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1411 """)
1412 if args is None:
1413 args = sys.argv[1:]
1414
1415 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001416 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001417 sys.exit(1)
1418
1419 if args[0] == '-l':
1420 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001421 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001422 sys.exit(1)
1423 zf = ZipFile(args[1], 'r')
1424 zf.printdir()
1425 zf.close()
1426
1427 elif args[0] == '-t':
1428 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001429 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001430 sys.exit(1)
1431 zf = ZipFile(args[1], 'r')
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +00001432 badfile = zf.testzip()
1433 if badfile:
1434 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001435 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001436
1437 elif args[0] == '-e':
1438 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001439 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001440 sys.exit(1)
1441
1442 zf = ZipFile(args[1], 'r')
1443 out = args[2]
1444 for path in zf.namelist():
1445 if path.startswith('./'):
1446 tgt = os.path.join(out, path[2:])
1447 else:
1448 tgt = os.path.join(out, path)
1449
1450 tgtdir = os.path.dirname(tgt)
1451 if not os.path.exists(tgtdir):
1452 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001453 with open(tgt, 'wb') as fp:
1454 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001455 zf.close()
1456
1457 elif args[0] == '-c':
1458 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001459 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001460 sys.exit(1)
1461
1462 def addToZip(zf, path, zippath):
1463 if os.path.isfile(path):
1464 zf.write(path, zippath, ZIP_DEFLATED)
1465 elif os.path.isdir(path):
1466 for nm in os.listdir(path):
1467 addToZip(zf,
1468 os.path.join(path, nm), os.path.join(zippath, nm))
1469 # else: ignore
1470
1471 zf = ZipFile(args[1], 'w', allowZip64=True)
1472 for src in args[2:]:
1473 addToZip(zf, src, os.path.basename(src))
1474
1475 zf.close()
1476
1477if __name__ == "__main__":
1478 main()