blob: 3d2d57b2058fb5d16b2ee21a66158e9e82da4227 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
170 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
R. David Murray93a59652010-01-06 20:12:07 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000208 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append(b"")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000216
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000270 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000291 self.comment = b"" # Comment for each file
292 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000329 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 try:
347 return self.filename.encode('ascii'), self.flag_bits
348 except UnicodeEncodeError:
349 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350
351 def _decodeExtra(self):
352 # Try to decode the extra field.
353 extra = self.extra
354 unpack = struct.unpack
355 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000356 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357 if tp == 1:
358 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000359 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000363 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 elif ln == 0:
365 counts = ()
366 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000367 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368
369 idx = 0
370
371 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 self.file_size = counts[idx]
374 idx += 1
375
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000376 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377 self.compress_size = counts[idx]
378 idx += 1
379
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000380 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000381 old = self.header_offset
382 self.header_offset = counts[idx]
383 idx+=1
384
385 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386
387
Thomas Wouterscf297e42007-02-23 15:07:44 +0000388class _ZipDecrypter:
389 """Class to handle decryption of files stored within a ZIP archive.
390
391 ZIP supports a password-based form of encryption. Even though known
392 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000393 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000394
395 Usage:
396 zd = _ZipDecrypter(mypwd)
397 plain_char = zd(cypher_char)
398 plain_text = map(zd, cypher_text)
399 """
400
401 def _GenerateCRCTable():
402 """Generate a CRC-32 table.
403
404 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
405 internal keys. We noticed that a direct implementation is faster than
406 relying on binascii.crc32().
407 """
408 poly = 0xedb88320
409 table = [0] * 256
410 for i in range(256):
411 crc = i
412 for j in range(8):
413 if crc & 1:
414 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
415 else:
416 crc = ((crc >> 1) & 0x7FFFFFFF)
417 table[i] = crc
418 return table
419 crctable = _GenerateCRCTable()
420
421 def _crc32(self, ch, crc):
422 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000423 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000424
425 def __init__(self, pwd):
426 self.key0 = 305419896
427 self.key1 = 591751049
428 self.key2 = 878082192
429 for p in pwd:
430 self._UpdateKeys(p)
431
432 def _UpdateKeys(self, c):
433 self.key0 = self._crc32(c, self.key0)
434 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
435 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000436 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000437
438 def __call__(self, c):
439 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000440 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000441 k = self.key2 | 2
442 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443 self._UpdateKeys(c)
444 return c
445
Guido van Rossumd8faa362007-04-27 19:54:29 +0000446class ZipExtFile:
447 """File-like object for reading an archive member.
448 Is returned by ZipFile.open().
449 """
450
451 def __init__(self, fileobj, zipinfo, decrypt=None):
452 self.fileobj = fileobj
453 self.decrypter = decrypt
454 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000455 self.rawbuffer = b''
456 self.readbuffer = b''
457 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000458 self.eof = False
459 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000460 self.nlSeps = (b"\n", )
461 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462
463 self.compress_type = zipinfo.compress_type
464 self.compress_size = zipinfo.compress_size
465
466 self.closed = False
467 self.mode = "r"
468 self.name = zipinfo.filename
469
470 # read from compressed files in 64k blocks
471 self.compreadsize = 64*1024
472 if self.compress_type == ZIP_DEFLATED:
473 self.dc = zlib.decompressobj(-15)
474
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +0000475 if hasattr(zipinfo, 'CRC'):
476 self._expected_crc = zipinfo.CRC
477 self._running_crc = crc32(b'') & 0xffffffff
478 else:
479 self._expected_crc = None
480
Guido van Rossumd8faa362007-04-27 19:54:29 +0000481 def set_univ_newlines(self, univ_newlines):
482 self.univ_newlines = univ_newlines
483
484 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000485 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000486 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000487 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000488
489 def __iter__(self):
490 return self
491
492 def __next__(self):
493 nextline = self.readline()
494 if not nextline:
495 raise StopIteration()
496
497 return nextline
498
499 def close(self):
500 self.closed = True
501
502 def _checkfornewline(self):
503 nl, nllen = -1, -1
504 if self.linebuffer:
505 # ugly check for cases where half of an \r\n pair was
506 # read on the last pass, and the \r was discarded. In this
507 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000508 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000509 self.linebuffer = self.linebuffer[1:]
510
511 for sep in self.nlSeps:
512 nl = self.linebuffer.find(sep)
513 if nl >= 0:
514 nllen = len(sep)
515 return nl, nllen
516
517 return nl, nllen
518
519 def readline(self, size = -1):
520 """Read a line with approx. size. If size is negative,
521 read a whole line.
522 """
523 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000524 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000525 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000526 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
528 # check for a newline already in buffer
529 nl, nllen = self._checkfornewline()
530
531 if nl >= 0:
532 # the next line was already in the buffer
533 nl = min(nl, size)
534 else:
535 # no line break in buffer - try to read more
536 size -= len(self.linebuffer)
537 while nl < 0 and size > 0:
538 buf = self.read(min(size, 100))
539 if not buf:
540 break
541 self.linebuffer += buf
542 size -= len(buf)
543
544 # check for a newline in buffer
545 nl, nllen = self._checkfornewline()
546
547 # we either ran out of bytes in the file, or
548 # met the specified size limit without finding a newline,
549 # so return current buffer
550 if nl < 0:
551 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 return s
554
555 buf = self.linebuffer[:nl]
556 self.lastdiscard = self.linebuffer[nl:nl + nllen]
557 self.linebuffer = self.linebuffer[nl + nllen:]
558
559 # line is always returned with \n as newline char (except possibly
560 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000561 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562
563 def readlines(self, sizehint = -1):
564 """Return a list with all (following) lines. The sizehint parameter
565 is ignored in this implementation.
566 """
567 result = []
568 while True:
569 line = self.readline()
570 if not line: break
571 result.append(line)
572 return result
573
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +0000574 def _update_crc(self, newdata, eof):
575 # Update the CRC using the given data.
576 if self._expected_crc is None:
577 # No need to compute the CRC if we don't have a reference value
578 return
579 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
580 # Check the CRC if we're at the end of the file
581 if eof and self._running_crc != self._expected_crc:
582 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
583
Guido van Rossumd8faa362007-04-27 19:54:29 +0000584 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000585 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000586 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000587 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000588
589 # determine read size
590 bytesToRead = self.compress_size - self.bytes_read
591
592 # adjust read size for encrypted files since the first 12 bytes
593 # are for the encryption/password information
594 if self.decrypter is not None:
595 bytesToRead -= 12
596
597 if size is not None and size >= 0:
598 if self.compress_type == ZIP_STORED:
599 lr = len(self.readbuffer)
600 bytesToRead = min(bytesToRead, size - lr)
601 elif self.compress_type == ZIP_DEFLATED:
602 if len(self.readbuffer) > size:
603 # the user has requested fewer bytes than we've already
604 # pulled through the decompressor; don't read any more
605 bytesToRead = 0
606 else:
607 # user will use up the buffer, so read some more
608 lr = len(self.rawbuffer)
609 bytesToRead = min(bytesToRead, self.compreadsize - lr)
610
611 # avoid reading past end of file contents
612 if bytesToRead + self.bytes_read > self.compress_size:
613 bytesToRead = self.compress_size - self.bytes_read
614
615 # try to read from file (if necessary)
616 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000617 data = self.fileobj.read(bytesToRead)
618 self.bytes_read += len(data)
619 try:
620 self.rawbuffer += data
621 except:
622 print(repr(self.fileobj), repr(self.rawbuffer),
623 repr(data))
624 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000625
626 # handle contents of raw buffer
627 if self.rawbuffer:
628 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000629 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000630
631 # decrypt new data if we were given an object to handle that
632 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000633 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000634
635 # decompress newly read data if necessary
636 if newdata and self.compress_type == ZIP_DEFLATED:
637 newdata = self.dc.decompress(newdata)
638 self.rawbuffer = self.dc.unconsumed_tail
639 if self.eof and len(self.rawbuffer) == 0:
640 # we're out of raw bytes (both from the file and
641 # the local buffer); flush just to make sure the
642 # decompressor is done
643 newdata += self.dc.flush()
644 # prevent decompressor from being used again
645 self.dc = None
646
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +0000647 self._update_crc(newdata, eof=(
648 self.compress_size == self.bytes_read and
649 len(self.rawbuffer) == 0))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650 self.readbuffer += newdata
651
Guido van Rossumd8faa362007-04-27 19:54:29 +0000652 # return what the user asked for
653 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000654 data = self.readbuffer
655 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000657 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000658 self.readbuffer = self.readbuffer[size:]
659
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000660 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000661
662
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000663class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000664 """ Class with methods to open, read, write, close, list zip files.
665
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000666 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000667
Fred Drake3d9091e2001-03-26 15:49:24 +0000668 file: Either the path to the file, or a file-like object.
669 If it is a path, the file will be opened and closed by ZipFile.
670 mode: The mode can be either read "r", write "w" or append "a".
671 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000672 allowZip64: if True ZipFile will create files with ZIP64 extensions when
673 needed, otherwise it will raise an exception when this would
674 be necessary.
675
Fred Drake3d9091e2001-03-26 15:49:24 +0000676 """
Fred Drake484d7352000-10-02 21:14:52 +0000677
Fred Drake90eac282001-02-28 05:29:34 +0000678 fp = None # Set here since __del__ checks it
679
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000680 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000681 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000682 if mode not in ("r", "w", "a"):
683 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
684
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000685 if compression == ZIP_STORED:
686 pass
687 elif compression == ZIP_DEFLATED:
688 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000689 raise RuntimeError(
690 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000691 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000692 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000693
694 self._allowZip64 = allowZip64
695 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000696 self.debug = 0 # Level of printing: 0 through 3
697 self.NameToInfo = {} # Find file info given name
698 self.filelist = [] # List of ZipInfo instances for archive
699 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000700 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000701 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000702 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000703
Fred Drake3d9091e2001-03-26 15:49:24 +0000704 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000705 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000706 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000707 self._filePassed = 0
708 self.filename = file
709 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000710 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000711 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000712 except IOError:
713 if mode == 'a':
714 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000715 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000716 else:
717 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000718 else:
719 self._filePassed = 1
720 self.fp = file
721 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000722
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 self._GetContents()
725 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000726 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000728 try: # See if file is a zip file
729 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000731 self.fp.seek(self.start_dir, 0)
732 except BadZipfile: # file is not a zip file, just append
733 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000735 if not self._filePassed:
736 self.fp.close()
737 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000738 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739
740 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000741 """Read the directory, making sure we close the file if the format
742 is bad."""
743 try:
744 self._RealGetContents()
745 except BadZipfile:
746 if not self._filePassed:
747 self.fp.close()
748 self.fp = None
749 raise
750
751 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000752 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000754 endrec = _EndRecData(fp)
755 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000756 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000758 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000759 size_cd = endrec[_ECD_SIZE] # bytes in central directory
760 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
761 self.comment = endrec[_ECD_COMMENT] # archive comment
762
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000763 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000764 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000765 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
766 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000767 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
768
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000770 inferred = concat + offset_cd
771 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 # self.start_dir: Position of start of central directory
773 self.start_dir = offset_cd + concat
774 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000775 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000776 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 total = 0
778 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000779 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000780 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000781 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 centdir = struct.unpack(structCentralDir, centdir)
783 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000784 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000785 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000786 flags = centdir[5]
787 if flags & 0x800:
788 # UTF-8 file names extension
789 filename = filename.decode('utf-8')
790 else:
791 # Historical ZIP filename encoding
792 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000794 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000795 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
796 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000797 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 (x.create_version, x.create_system, x.extract_version, x.reserved,
799 x.flag_bits, x.compress_type, t, d,
800 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
801 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
802 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000803 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000805 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000806
807 x._decodeExtra()
808 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 self.filelist.append(x)
810 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000811
812 # update total bytes read from central directory
813 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
814 + centdir[_CD_EXTRA_FIELD_LENGTH]
815 + centdir[_CD_COMMENT_LENGTH])
816
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000818 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000819
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820
821 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 l = []
824 for data in self.filelist:
825 l.append(data.filename)
826 return l
827
828 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000829 """Return a list of class ZipInfo instances for files in the
830 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000831 return self.filelist
832
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000833 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000834 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000835 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
836 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000838 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000839 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
840 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841
842 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000843 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000844 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000845 for zinfo in self.filelist:
846 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000847 # Read by chunks, to avoid an OverflowError or a
848 # MemoryError with very large embedded files.
849 f = self.open(zinfo.filename, "r")
850 while f.read(chunk_size): # Check CRC-32
851 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000852 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000853 return zinfo.filename
854
855 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000856 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000857 info = self.NameToInfo.get(name)
858 if info is None:
859 raise KeyError(
860 'There is no item named %r in the archive' % name)
861
862 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000863
Thomas Wouterscf297e42007-02-23 15:07:44 +0000864 def setpassword(self, pwd):
865 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000866 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000867 self.pwd = pwd
868
869 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000870 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871 return self.open(name, "r", pwd).read()
872
873 def open(self, name, mode="r", pwd=None):
874 """Return file-like object for 'name'."""
875 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000876 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000878 raise RuntimeError(
879 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000880
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881 # Only open a new file for instances where we were not
882 # given a file object in the constructor
883 if self._filePassed:
884 zef_file = self.fp
885 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000886 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887
Georg Brandlb533e262008-05-25 18:19:30 +0000888 # Make sure we have an info object
889 if isinstance(name, ZipInfo):
890 # 'name' is already an info object
891 zinfo = name
892 else:
893 # Get info object for name
894 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895
896 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000897
898 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000899 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000900 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000901 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000902
903 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000905 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000907
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000908 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000909 raise BadZipfile(
910 'File name in directory %r and header %r differ.'
911 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000912
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 # check for encrypted flag & handle password
914 is_encrypted = zinfo.flag_bits & 0x1
915 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000916 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917 if not pwd:
918 pwd = self.pwd
919 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000920 raise RuntimeError("File %s is encrypted, "
921 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000922
Thomas Wouterscf297e42007-02-23 15:07:44 +0000923 zd = _ZipDecrypter(pwd)
924 # The first 12 bytes in the cypher stream is an encryption header
925 # used to strengthen the algorithm. The first 11 bytes are
926 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000927 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000928 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000930 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000931 if zinfo.flag_bits & 0x8:
932 # compare against the file type from extended local headers
933 check_byte = (zinfo._raw_time >> 8) & 0xff
934 else:
935 # compare against the CRC otherwise
936 check_byte = (zinfo.CRC >> 24) & 0xff
937 if h[11] != check_byte:
938 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939
940 # build and return a ZipExtFile
941 if zd is None:
942 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000943 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 zef = ZipExtFile(zef_file, zinfo, zd)
945
946 # set universal newlines on ZipExtFile if necessary
947 if "U" in mode:
948 zef.set_univ_newlines(True)
949 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000950
Christian Heimes790c8232008-01-07 21:14:23 +0000951 def extract(self, member, path=None, pwd=None):
952 """Extract a member from the archive to the current working directory,
953 using its full name. Its file information is extracted as accurately
954 as possible. `member' may be a filename or a ZipInfo object. You can
955 specify a different directory using `path'.
956 """
957 if not isinstance(member, ZipInfo):
958 member = self.getinfo(member)
959
960 if path is None:
961 path = os.getcwd()
962
963 return self._extract_member(member, path, pwd)
964
965 def extractall(self, path=None, members=None, pwd=None):
966 """Extract all members from the archive to the current working
967 directory. `path' specifies a different directory to extract to.
968 `members' is optional and must be a subset of the list returned
969 by namelist().
970 """
971 if members is None:
972 members = self.namelist()
973
974 for zipinfo in members:
975 self.extract(zipinfo, path, pwd)
976
977 def _extract_member(self, member, targetpath, pwd):
978 """Extract the ZipInfo object 'member' to a physical
979 file on the path targetpath.
980 """
981 # build the destination pathname, replacing
982 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000983 # Strip trailing path separator, unless it represents the root.
984 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
985 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000986 targetpath = targetpath[:-1]
987
988 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000989 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000990 targetpath = os.path.join(targetpath, member.filename[1:])
991 else:
992 targetpath = os.path.join(targetpath, member.filename)
993
994 targetpath = os.path.normpath(targetpath)
995
996 # Create all upper directories if necessary.
997 upperdirs = os.path.dirname(targetpath)
998 if upperdirs and not os.path.exists(upperdirs):
999 os.makedirs(upperdirs)
1000
Martin v. Löwis59e47792009-01-24 14:10:07 +00001001 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001002 if not os.path.isdir(targetpath):
1003 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001004 return targetpath
1005
Georg Brandlb533e262008-05-25 18:19:30 +00001006 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001007 target = open(targetpath, "wb")
1008 shutil.copyfileobj(source, target)
1009 source.close()
1010 target.close()
1011
1012 return targetpath
1013
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001014 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001015 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001016 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001017 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001018 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001020 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001022 raise RuntimeError(
1023 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001025 raise RuntimeError(
1026 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001028 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001029 if zinfo.file_size > ZIP64_LIMIT:
1030 if not self._allowZip64:
1031 raise LargeZipFile("Filesize would require ZIP64 extensions")
1032 if zinfo.header_offset > ZIP64_LIMIT:
1033 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001034 raise LargeZipFile(
1035 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036
1037 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001038 """Put the bytes from filename into the archive under the name
1039 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001040 if not self.fp:
1041 raise RuntimeError(
1042 "Attempt to write to ZIP archive that was already closed")
1043
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001045 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001046 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047 date_time = mtime[0:6]
1048 # Create ZipInfo instance to store file information
1049 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001050 arcname = filename
1051 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1052 while arcname[0] in (os.sep, os.altsep):
1053 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001054 if isdir:
1055 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001056 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001057 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001058 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001059 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 else:
Tim Peterse1190062001-01-15 03:34:38 +00001061 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062
1063 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001064 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001065 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001066
1067 self._writecheck(zinfo)
1068 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001069
1070 if isdir:
1071 zinfo.file_size = 0
1072 zinfo.compress_size = 0
1073 zinfo.CRC = 0
1074 self.filelist.append(zinfo)
1075 self.NameToInfo[zinfo.filename] = zinfo
1076 self.fp.write(zinfo.FileHeader())
1077 return
1078
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001079 with open(filename, "rb") as fp:
1080 # Must overwrite CRC and sizes with correct data later
1081 zinfo.CRC = CRC = 0
1082 zinfo.compress_size = compress_size = 0
1083 zinfo.file_size = file_size = 0
1084 self.fp.write(zinfo.FileHeader())
1085 if zinfo.compress_type == ZIP_DEFLATED:
1086 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1087 zlib.DEFLATED, -15)
1088 else:
1089 cmpr = None
1090 while 1:
1091 buf = fp.read(1024 * 8)
1092 if not buf:
1093 break
1094 file_size = file_size + len(buf)
1095 CRC = crc32(buf, CRC) & 0xffffffff
1096 if cmpr:
1097 buf = cmpr.compress(buf)
1098 compress_size = compress_size + len(buf)
1099 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 if cmpr:
1101 buf = cmpr.flush()
1102 compress_size = compress_size + len(buf)
1103 self.fp.write(buf)
1104 zinfo.compress_size = compress_size
1105 else:
1106 zinfo.compress_size = file_size
1107 zinfo.CRC = CRC
1108 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001109 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001110 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001111 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001112 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001113 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001114 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115 self.filelist.append(zinfo)
1116 self.NameToInfo[zinfo.filename] = zinfo
1117
Guido van Rossum85825dc2007-08-27 17:03:28 +00001118 def writestr(self, zinfo_or_arcname, data):
1119 """Write a file into the archive. The contents is 'data', which
1120 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1121 it is encoded as UTF-8 first.
1122 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001123 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001124 if isinstance(data, str):
1125 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001126 if not isinstance(zinfo_or_arcname, ZipInfo):
1127 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001128 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001129 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001130 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001131 else:
1132 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001133
1134 if not self.fp:
1135 raise RuntimeError(
1136 "Attempt to write to ZIP archive that was already closed")
1137
Guido van Rossum85825dc2007-08-27 17:03:28 +00001138 zinfo.file_size = len(data) # Uncompressed size
1139 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 self._writecheck(zinfo)
1141 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001142 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 if zinfo.compress_type == ZIP_DEFLATED:
1144 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1145 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001146 data = co.compress(data) + co.flush()
1147 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 else:
1149 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001150 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001151 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001152 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001153 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001155 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001156 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001157 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001158 self.filelist.append(zinfo)
1159 self.NameToInfo[zinfo.filename] = zinfo
1160
1161 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001162 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001163 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164
1165 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001166 """Close the file, and for mode "w" and "a" write the ending
1167 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001168 if self.fp is None:
1169 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170
1171 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001172 count = 0
1173 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001174 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 count = count + 1
1176 dt = zinfo.date_time
1177 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001178 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001179 extra = []
1180 if zinfo.file_size > ZIP64_LIMIT \
1181 or zinfo.compress_size > ZIP64_LIMIT:
1182 extra.append(zinfo.file_size)
1183 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001184 file_size = 0xffffffff
1185 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001186 else:
1187 file_size = zinfo.file_size
1188 compress_size = zinfo.compress_size
1189
1190 if zinfo.header_offset > ZIP64_LIMIT:
1191 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001192 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193 else:
1194 header_offset = zinfo.header_offset
1195
1196 extra_data = zinfo.extra
1197 if extra:
1198 # Append a ZIP64 field to the extra's
1199 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001200 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001201 1, 8*len(extra), *extra) + extra_data
1202
1203 extract_version = max(45, zinfo.extract_version)
1204 create_version = max(45, zinfo.create_version)
1205 else:
1206 extract_version = zinfo.extract_version
1207 create_version = zinfo.create_version
1208
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001209 try:
1210 filename, flag_bits = zinfo._encodeFilenameFlags()
1211 centdir = struct.pack(structCentralDir,
Ezio Melotti6e52a5a2009-09-25 21:38:29 +00001212 stringCentralDir, create_version,
1213 zinfo.create_system, extract_version, zinfo.reserved,
1214 flag_bits, zinfo.compress_type, dostime, dosdate,
1215 zinfo.CRC, compress_size, file_size,
1216 len(filename), len(extra_data), len(zinfo.comment),
1217 0, zinfo.internal_attr, zinfo.external_attr,
1218 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001219 except DeprecationWarning:
Ezio Melotti6e52a5a2009-09-25 21:38:29 +00001220 print((structCentralDir, stringCentralDir, create_version,
1221 zinfo.create_system, extract_version, zinfo.reserved,
1222 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1223 zinfo.CRC, compress_size, file_size,
1224 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1225 0, zinfo.internal_attr, zinfo.external_attr,
1226 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001227 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001228 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001229 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001231 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001232
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 pos2 = self.fp.tell()
1234 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001235 centDirCount = count
1236 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001237 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001238 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1239 centDirOffset > ZIP64_LIMIT or
1240 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001241 # Need to write the ZIP64 end-of-archive records
1242 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001243 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001244 44, 45, 45, 0, 0, centDirCount, centDirCount,
1245 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001246 self.fp.write(zip64endrec)
1247
1248 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001249 structEndArchive64Locator,
1250 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001251 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001252 centDirCount = min(centDirCount, 0xFFFF)
1253 centDirSize = min(centDirSize, 0xFFFFFFFF)
1254 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001255
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001256 # check for valid comment length
1257 if len(self.comment) >= ZIP_MAX_COMMENT:
1258 if self.debug > 0:
1259 msg = 'Archive comment is too long; truncating to %d bytes' \
1260 % ZIP_MAX_COMMENT
1261 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001262
Georg Brandl2ee470f2008-07-16 12:55:28 +00001263 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001264 0, 0, centDirCount, centDirCount,
1265 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001266 self.fp.write(endrec)
1267 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001268 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001269
Fred Drake3d9091e2001-03-26 15:49:24 +00001270 if not self._filePassed:
1271 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272 self.fp = None
1273
1274
1275class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001276 """Class to create ZIP archives with Python library files and packages."""
1277
Georg Brandlb044b2a2009-09-16 16:05:59 +00001278 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001279 """Add all files from "pathname" to the ZIP archive.
1280
Fred Drake484d7352000-10-02 21:14:52 +00001281 If pathname is a package directory, search the directory and
1282 all package subdirectories recursively for all *.py and enter
1283 the modules into the archive. If pathname is a plain
1284 directory, listdir *.py and enter all modules. Else, pathname
1285 must be a Python *.py file and the module will be put into the
1286 archive. Added modules are always module.pyo or module.pyc.
1287 This method will compile the module.py into module.pyc if
1288 necessary.
1289 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001290 dir, name = os.path.split(pathname)
1291 if os.path.isdir(pathname):
1292 initname = os.path.join(pathname, "__init__.py")
1293 if os.path.isfile(initname):
1294 # This is a package directory, add it
1295 if basename:
1296 basename = "%s/%s" % (basename, name)
1297 else:
1298 basename = name
1299 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001300 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 fname, arcname = self._get_codename(initname[0:-3], basename)
1302 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001303 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 self.write(fname, arcname)
1305 dirlist = os.listdir(pathname)
1306 dirlist.remove("__init__.py")
1307 # Add all *.py files and package subdirectories
1308 for filename in dirlist:
1309 path = os.path.join(pathname, filename)
1310 root, ext = os.path.splitext(filename)
1311 if os.path.isdir(path):
1312 if os.path.isfile(os.path.join(path, "__init__.py")):
1313 # This is a package directory, add it
1314 self.writepy(path, basename) # Recursive call
1315 elif ext == ".py":
1316 fname, arcname = self._get_codename(path[0:-3],
1317 basename)
1318 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001319 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 self.write(fname, arcname)
1321 else:
1322 # This is NOT a package directory, add its files at top level
1323 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001324 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 for filename in os.listdir(pathname):
1326 path = os.path.join(pathname, filename)
1327 root, ext = os.path.splitext(filename)
1328 if ext == ".py":
1329 fname, arcname = self._get_codename(path[0:-3],
1330 basename)
1331 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001332 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 self.write(fname, arcname)
1334 else:
1335 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001336 raise RuntimeError(
1337 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 fname, arcname = self._get_codename(pathname[0:-3], basename)
1339 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001340 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001341 self.write(fname, arcname)
1342
1343 def _get_codename(self, pathname, basename):
1344 """Return (filename, archivename) for the path.
1345
Fred Drake484d7352000-10-02 21:14:52 +00001346 Given a module name path, return the correct file path and
1347 archive name, compiling if necessary. For example, given
1348 /python/lib/string, return (/python/lib/string.pyc, string).
1349 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 file_py = pathname + ".py"
1351 file_pyc = pathname + ".pyc"
1352 file_pyo = pathname + ".pyo"
1353 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001354 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001355 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001356 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001357 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001358 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001359 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001360 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001361 try:
1362 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001363 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001364 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001365 fname = file_pyc
1366 else:
1367 fname = file_pyc
1368 archivename = os.path.split(fname)[1]
1369 if basename:
1370 archivename = "%s/%s" % (basename, archivename)
1371 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001372
1373
1374def main(args = None):
1375 import textwrap
1376 USAGE=textwrap.dedent("""\
1377 Usage:
1378 zipfile.py -l zipfile.zip # Show listing of a zipfile
1379 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1380 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1381 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1382 """)
1383 if args is None:
1384 args = sys.argv[1:]
1385
1386 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001387 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001388 sys.exit(1)
1389
1390 if args[0] == '-l':
1391 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001392 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001393 sys.exit(1)
1394 zf = ZipFile(args[1], 'r')
1395 zf.printdir()
1396 zf.close()
1397
1398 elif args[0] == '-t':
1399 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001400 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001401 sys.exit(1)
1402 zf = ZipFile(args[1], 'r')
Antoine Pitrou5f2a7bc2010-08-12 15:30:13 +00001403 badfile = zf.testzip()
1404 if badfile:
1405 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001406 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407
1408 elif args[0] == '-e':
1409 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001410 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001411 sys.exit(1)
1412
1413 zf = ZipFile(args[1], 'r')
1414 out = args[2]
1415 for path in zf.namelist():
1416 if path.startswith('./'):
1417 tgt = os.path.join(out, path[2:])
1418 else:
1419 tgt = os.path.join(out, path)
1420
1421 tgtdir = os.path.dirname(tgt)
1422 if not os.path.exists(tgtdir):
1423 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001424 with open(tgt, 'wb') as fp:
1425 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001426 zf.close()
1427
1428 elif args[0] == '-c':
1429 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001430 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001431 sys.exit(1)
1432
1433 def addToZip(zf, path, zippath):
1434 if os.path.isfile(path):
1435 zf.write(path, zippath, ZIP_DEFLATED)
1436 elif os.path.isdir(path):
1437 for nm in os.listdir(path):
1438 addToZip(zf,
1439 os.path.join(path, nm), os.path.join(zippath, nm))
1440 # else: ignore
1441
1442 zf = ZipFile(args[1], 'w', allowZip64=True)
1443 for src in args[2:]:
1444 addToZip(zf, src, os.path.basename(src))
1445
1446 zf.close()
1447
1448if __name__ == "__main__":
1449 main()