blob: 5b3f6f9603ed33e889954014a9992835e886946d [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000184 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253
R David Murray4fbb9db2011-06-09 15:50:51 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Unable to find a valid end of central directory structure
259 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000284 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000308 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000309 self.comment = b"" # Comment for each file
310 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000324 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328
329 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 else:
Tim Peterse1190062001-01-15 03:34:38 +0000338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
342 extra = self.extra
343
344 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
345 # File is larger than what fits into a 4 byte integer,
346 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000347 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000350 file_size = 0xffffffff
351 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 self.extract_version = max(45, self.extract_version)
353 self.create_version = max(45, self.extract_version)
354
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000355 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000356 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000357 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358 self.compress_type, dostime, dosdate, CRC,
359 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000360 len(filename), len(extra))
361 return header + filename + extra
362
363 def _encodeFilenameFlags(self):
364 try:
365 return self.filename.encode('ascii'), self.flag_bits
366 except UnicodeEncodeError:
367 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368
369 def _decodeExtra(self):
370 # Try to decode the extra field.
371 extra = self.extra
372 unpack = struct.unpack
373 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 if tp == 1:
376 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000379 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000380 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000381 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 elif ln == 0:
383 counts = ()
384 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000385 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386
387 idx = 0
388
389 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000390 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391 self.file_size = counts[idx]
392 idx += 1
393
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000394 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 self.compress_size = counts[idx]
396 idx += 1
397
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000398 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 old = self.header_offset
400 self.header_offset = counts[idx]
401 idx+=1
402
403 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
405
Thomas Wouterscf297e42007-02-23 15:07:44 +0000406class _ZipDecrypter:
407 """Class to handle decryption of files stored within a ZIP archive.
408
409 ZIP supports a password-based form of encryption. Even though known
410 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000411 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000412
413 Usage:
414 zd = _ZipDecrypter(mypwd)
415 plain_char = zd(cypher_char)
416 plain_text = map(zd, cypher_text)
417 """
418
419 def _GenerateCRCTable():
420 """Generate a CRC-32 table.
421
422 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
423 internal keys. We noticed that a direct implementation is faster than
424 relying on binascii.crc32().
425 """
426 poly = 0xedb88320
427 table = [0] * 256
428 for i in range(256):
429 crc = i
430 for j in range(8):
431 if crc & 1:
432 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
433 else:
434 crc = ((crc >> 1) & 0x7FFFFFFF)
435 table[i] = crc
436 return table
437 crctable = _GenerateCRCTable()
438
439 def _crc32(self, ch, crc):
440 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000441 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000442
443 def __init__(self, pwd):
444 self.key0 = 305419896
445 self.key1 = 591751049
446 self.key2 = 878082192
447 for p in pwd:
448 self._UpdateKeys(p)
449
450 def _UpdateKeys(self, c):
451 self.key0 = self._crc32(c, self.key0)
452 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
453 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000454 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000455
456 def __call__(self, c):
457 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000458 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000459 k = self.key2 | 2
460 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000461 self._UpdateKeys(c)
462 return c
463
Ezio Melotti6a5fc4c2012-11-18 13:20:36 +0200464
465compressor_names = {
466 0: 'store',
467 1: 'shrink',
468 2: 'reduce',
469 3: 'reduce',
470 4: 'reduce',
471 5: 'reduce',
472 6: 'implode',
473 7: 'tokenize',
474 8: 'deflate',
475 9: 'deflate64',
476 10: 'implode',
477 12: 'bzip2',
478 14: 'lzma',
479 18: 'terse',
480 19: 'lz77',
481 97: 'wavpack',
482 98: 'ppmd',
483}
484
485
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000486class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000487 """File-like object for reading an archive member.
488 Is returned by ZipFile.open().
489 """
490
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000491 # Max size supported by decompressor.
492 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000494 # Read from compressed files in 4k blocks.
495 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000496
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000497 # Search for universal newlines or line chunks.
498 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
499
Łukasz Langae94980a2010-11-22 23:31:26 +0000500 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
501 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000502 self._fileobj = fileobj
503 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000504 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000505
Ezio Melotti92b47432010-01-28 01:44:41 +0000506 self._compress_type = zipinfo.compress_type
507 self._compress_size = zipinfo.compress_size
508 self._compress_left = zipinfo.compress_size
509
510 if self._compress_type == ZIP_DEFLATED:
511 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti6a5fc4c2012-11-18 13:20:36 +0200512 elif self._compress_type != ZIP_STORED:
513 descr = compressor_names.get(self._compress_type)
514 if descr:
515 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
516 else:
517 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000518 self._unconsumed = b''
519
520 self._readbuffer = b''
521 self._offset = 0
522
523 self._universal = 'U' in mode
524 self.newlines = None
525
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000526 # Adjust read size for encrypted files since the first 12 bytes
527 # are for the encryption/password information.
528 if self._decrypter is not None:
529 self._compress_left -= 12
530
531 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000532 self.name = zipinfo.filename
533
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000534 if hasattr(zipinfo, 'CRC'):
535 self._expected_crc = zipinfo.CRC
536 self._running_crc = crc32(b'') & 0xffffffff
537 else:
538 self._expected_crc = None
539
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000540 def readline(self, limit=-1):
541 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000543 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000544 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000545
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000546 if not self._universal and limit < 0:
547 # Shortcut common case - newline found in buffer.
548 i = self._readbuffer.find(b'\n', self._offset) + 1
549 if i > 0:
550 line = self._readbuffer[self._offset: i]
551 self._offset = i
552 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000554 if not self._universal:
555 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000557 line = b''
558 while limit < 0 or len(line) < limit:
559 readahead = self.peek(2)
560 if readahead == b'':
561 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000563 #
564 # Search for universal newlines or line chunks.
565 #
566 # The pattern returns either a line chunk or a newline, but not
567 # both. Combined with peek(2), we are assured that the sequence
568 # '\r\n' is always retrieved completely and never split into
569 # separate newlines - '\r', '\n' due to coincidental readaheads.
570 #
571 match = self.PATTERN.search(readahead)
572 newline = match.group('newline')
573 if newline is not None:
574 if self.newlines is None:
575 self.newlines = []
576 if newline not in self.newlines:
577 self.newlines.append(newline)
578 self._offset += len(newline)
579 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000580
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000581 chunk = match.group('chunk')
582 if limit >= 0:
583 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000584
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000585 self._offset += len(chunk)
586 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000587
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000588 return line
589
590 def peek(self, n=1):
591 """Returns buffered bytes without advancing the position."""
592 if n > len(self._readbuffer) - self._offset:
593 chunk = self.read(n)
594 self._offset -= len(chunk)
595
596 # Return up to 512 bytes to reduce allocation overhead for tight loops.
597 return self._readbuffer[self._offset: self._offset + 512]
598
599 def readable(self):
600 return True
601
602 def read(self, n=-1):
603 """Read and return up to n bytes.
604 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000605 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000606 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000607 if n is None:
608 n = -1
609 while True:
610 if n < 0:
611 data = self.read1(n)
612 elif n > len(buf):
613 data = self.read1(n - len(buf))
614 else:
615 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616 if len(data) == 0:
617 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000618 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000619
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000620 def _update_crc(self, newdata, eof):
621 # Update the CRC using the given data.
622 if self._expected_crc is None:
623 # No need to compute the CRC if we don't have a reference value
624 return
625 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
626 # Check the CRC if we're at the end of the file
627 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000628 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000629
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630 def read1(self, n):
631 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000632
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000633 # Simplify algorithm (branching) by transforming negative n to large n.
634 if n < 0 or n is None:
635 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000637 # Bytes available in read buffer.
638 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000640 # Read from file.
641 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
642 nbytes = n - len_readbuffer - len(self._unconsumed)
643 nbytes = max(nbytes, self.MIN_READ_SIZE)
644 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000645
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000646 data = self._fileobj.read(nbytes)
647 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000648
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000649 if data and self._decrypter is not None:
650 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000651
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000652 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000653 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000654 self._readbuffer = self._readbuffer[self._offset:] + data
655 self._offset = 0
656 else:
657 # Prepare deflated bytes for decompression.
658 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000659
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000660 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000661 if (len(self._unconsumed) > 0 and n > len_readbuffer and
662 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000663 data = self._decompressor.decompress(
664 self._unconsumed,
665 max(n - len_readbuffer, self.MIN_READ_SIZE)
666 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000667
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000668 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000669 eof = len(self._unconsumed) == 0 and self._compress_left == 0
670 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000671 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000672
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000673 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000674 self._readbuffer = self._readbuffer[self._offset:] + data
675 self._offset = 0
676
677 # Read from buffer.
678 data = self._readbuffer[self._offset: self._offset + n]
679 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000680 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000681
Łukasz Langae94980a2010-11-22 23:31:26 +0000682 def close(self):
683 try:
684 if self._close_fileobj:
685 self._fileobj.close()
686 finally:
687 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000688
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000689
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000691 """ Class with methods to open, read, write, close, list zip files.
692
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000693 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000694
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 file: Either the path to the file, or a file-like object.
696 If it is a path, the file will be opened and closed by ZipFile.
697 mode: The mode can be either read "r", write "w" or append "a".
698 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000699 allowZip64: if True ZipFile will create files with ZIP64 extensions when
700 needed, otherwise it will raise an exception when this would
701 be necessary.
702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 """
Fred Drake484d7352000-10-02 21:14:52 +0000704
Fred Drake90eac282001-02-28 05:29:34 +0000705 fp = None # Set here since __del__ checks it
706
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000707 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000708 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000709 if mode not in ("r", "w", "a"):
710 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
711
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 if compression == ZIP_STORED:
713 pass
714 elif compression == ZIP_DEFLATED:
715 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000716 raise RuntimeError(
717 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000719 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000720
721 self._allowZip64 = allowZip64
722 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000723 self.debug = 0 # Level of printing: 0 through 3
724 self.NameToInfo = {} # Find file info given name
725 self.filelist = [] # List of ZipInfo instances for archive
726 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000727 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000728 self.pwd = None
R David Murray51804e92012-04-12 18:44:42 -0400729 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000730
Fred Drake3d9091e2001-03-26 15:49:24 +0000731 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000732 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000733 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000734 self._filePassed = 0
735 self.filename = file
736 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000737 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000738 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000739 except IOError:
740 if mode == 'a':
741 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000742 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000743 else:
744 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000745 else:
746 self._filePassed = 1
747 self.fp = file
748 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000749
Antoine Pitrou17babc52012-11-17 23:50:08 +0100750 try:
751 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000752 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100753 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000754 # set the modified flag so central directory gets written
755 # even if no files are added to the archive
756 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100757 elif key == 'a':
758 try:
759 # See if file is a zip file
760 self._RealGetContents()
761 # seek to start of directory and overwrite
762 self.fp.seek(self.start_dir, 0)
763 except BadZipFile:
764 # file is not a zip file, just append
765 self.fp.seek(0, 2)
766
767 # set the modified flag so central directory gets written
768 # even if no files are added to the archive
769 self._didModify = True
770 else:
771 raise RuntimeError('Mode must be "r", "w" or "a"')
772 except:
773 fp = self.fp
774 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000775 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100776 fp.close()
777 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000779 def __enter__(self):
780 return self
781
782 def __exit__(self, type, value, traceback):
783 self.close()
784
Tim Peters7d3bad62001-04-04 18:56:49 +0000785 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000786 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000787 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000788 try:
789 endrec = _EndRecData(fp)
790 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000791 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000792 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000793 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000795 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000796 size_cd = endrec[_ECD_SIZE] # bytes in central directory
797 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray51804e92012-04-12 18:44:42 -0400798 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000799
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000801 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000802 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
803 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000804 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
805
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000807 inferred = concat + offset_cd
808 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 # self.start_dir: Position of start of central directory
810 self.start_dir = offset_cd + concat
811 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000812 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000813 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 total = 0
815 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000816 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000817 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000818 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 centdir = struct.unpack(structCentralDir, centdir)
820 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000821 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000822 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000823 flags = centdir[5]
824 if flags & 0x800:
825 # UTF-8 file names extension
826 filename = filename.decode('utf-8')
827 else:
828 # Historical ZIP filename encoding
829 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000831 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000832 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
833 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000834 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 (x.create_version, x.create_system, x.extract_version, x.reserved,
836 x.flag_bits, x.compress_type, t, d,
837 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
838 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
839 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000840 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000842 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000843
844 x._decodeExtra()
845 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000846 self.filelist.append(x)
847 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000848
849 # update total bytes read from central directory
850 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
851 + centdir[_CD_EXTRA_FIELD_LENGTH]
852 + centdir[_CD_COMMENT_LENGTH])
853
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000855 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000857
858 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000859 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000860 l = []
861 for data in self.filelist:
862 l.append(data.filename)
863 return l
864
865 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000866 """Return a list of class ZipInfo instances for files in the
867 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000868 return self.filelist
869
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000870 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000871 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000872 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
873 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000874 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000875 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000876 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
877 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000878
879 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000880 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000881 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000882 for zinfo in self.filelist:
883 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000884 # Read by chunks, to avoid an OverflowError or a
885 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +0100886 with self.open(zinfo.filename, "r") as f:
887 while f.read(chunk_size): # Check CRC-32
888 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000889 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000890 return zinfo.filename
891
892 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000893 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000894 info = self.NameToInfo.get(name)
895 if info is None:
896 raise KeyError(
897 'There is no item named %r in the archive' % name)
898
899 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000900
Thomas Wouterscf297e42007-02-23 15:07:44 +0000901 def setpassword(self, pwd):
902 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000903 if pwd and not isinstance(pwd, bytes):
904 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
905 if pwd:
906 self.pwd = pwd
907 else:
908 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000909
R David Murray51804e92012-04-12 18:44:42 -0400910 @property
911 def comment(self):
912 """The comment text associated with the ZIP file."""
913 return self._comment
914
915 @comment.setter
916 def comment(self, comment):
917 if not isinstance(comment, bytes):
918 raise TypeError("comment: expected bytes, got %s" % type(comment))
919 # check for valid comment length
920 if len(comment) >= ZIP_MAX_COMMENT:
921 if self.debug:
922 print('Archive comment is too long; truncating to %d bytes'
923 % ZIP_MAX_COMMENT)
924 comment = comment[:ZIP_MAX_COMMENT]
925 self._comment = comment
926 self._didModify = True
927
Thomas Wouterscf297e42007-02-23 15:07:44 +0000928 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000929 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000930 with self.open(name, "r", pwd) as fp:
931 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932
933 def open(self, name, mode="r", pwd=None):
934 """Return file-like object for 'name'."""
935 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000936 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000937 if pwd and not isinstance(pwd, bytes):
938 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000939 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000940 raise RuntimeError(
941 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000942
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943 # Only open a new file for instances where we were not
944 # given a file object in the constructor
945 if self._filePassed:
946 zef_file = self.fp
947 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000948 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949
Antoine Pitrou17babc52012-11-17 23:50:08 +0100950 try:
951 # Make sure we have an info object
952 if isinstance(name, ZipInfo):
953 # 'name' is already an info object
954 zinfo = name
955 else:
956 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000957 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +0100958 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000959
Antoine Pitrou17babc52012-11-17 23:50:08 +0100960 # Skip the file header:
961 fheader = zef_file.read(sizeFileHeader)
962 if fheader[0:4] != stringFileHeader:
963 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000964
Antoine Pitrou17babc52012-11-17 23:50:08 +0100965 fheader = struct.unpack(structFileHeader, fheader)
966 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
967 if fheader[_FH_EXTRA_FIELD_LENGTH]:
968 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000969
Antoine Pitrou17babc52012-11-17 23:50:08 +0100970 if zinfo.flag_bits & 0x800:
971 # UTF-8 filename
972 fname_str = fname.decode("utf-8")
973 else:
974 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +0000975
Antoine Pitrou17babc52012-11-17 23:50:08 +0100976 if fname_str != zinfo.orig_filename:
977 raise BadZipFile(
978 'File name in directory %r and header %r differ.'
979 % (zinfo.orig_filename, fname))
980
981 # check for encrypted flag & handle password
982 is_encrypted = zinfo.flag_bits & 0x1
983 zd = None
984 if is_encrypted:
985 if not pwd:
986 pwd = self.pwd
987 if not pwd:
988 raise RuntimeError("File %s is encrypted, password "
989 "required for extraction" % name)
990
991 zd = _ZipDecrypter(pwd)
992 # The first 12 bytes in the cypher stream is an encryption header
993 # used to strengthen the algorithm. The first 11 bytes are
994 # completely random, while the 12th contains the MSB of the CRC,
995 # or the MSB of the file time depending on the header type
996 # and is used to check the correctness of the password.
997 header = zef_file.read(12)
998 h = list(map(zd, header[0:12]))
999 if zinfo.flag_bits & 0x8:
1000 # compare against the file type from extended local headers
1001 check_byte = (zinfo._raw_time >> 8) & 0xff
1002 else:
1003 # compare against the CRC otherwise
1004 check_byte = (zinfo.CRC >> 24) & 0xff
1005 if h[11] != check_byte:
1006 raise RuntimeError("Bad password for file", name)
1007
1008 return ZipExtFile(zef_file, mode, zinfo, zd,
1009 close_fileobj=not self._filePassed)
1010 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001011 if not self._filePassed:
1012 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001013 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001014
Christian Heimes790c8232008-01-07 21:14:23 +00001015 def extract(self, member, path=None, pwd=None):
1016 """Extract a member from the archive to the current working directory,
1017 using its full name. Its file information is extracted as accurately
1018 as possible. `member' may be a filename or a ZipInfo object. You can
1019 specify a different directory using `path'.
1020 """
1021 if not isinstance(member, ZipInfo):
1022 member = self.getinfo(member)
1023
1024 if path is None:
1025 path = os.getcwd()
1026
1027 return self._extract_member(member, path, pwd)
1028
1029 def extractall(self, path=None, members=None, pwd=None):
1030 """Extract all members from the archive to the current working
1031 directory. `path' specifies a different directory to extract to.
1032 `members' is optional and must be a subset of the list returned
1033 by namelist().
1034 """
1035 if members is None:
1036 members = self.namelist()
1037
1038 for zipinfo in members:
1039 self.extract(zipinfo, path, pwd)
1040
1041 def _extract_member(self, member, targetpath, pwd):
1042 """Extract the ZipInfo object 'member' to a physical
1043 file on the path targetpath.
1044 """
1045 # build the destination pathname, replacing
1046 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001047 # Strip trailing path separator, unless it represents the root.
1048 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1049 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001050 targetpath = targetpath[:-1]
1051
1052 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001053 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001054 targetpath = os.path.join(targetpath, member.filename[1:])
1055 else:
1056 targetpath = os.path.join(targetpath, member.filename)
1057
1058 targetpath = os.path.normpath(targetpath)
1059
1060 # Create all upper directories if necessary.
1061 upperdirs = os.path.dirname(targetpath)
1062 if upperdirs and not os.path.exists(upperdirs):
1063 os.makedirs(upperdirs)
1064
Martin v. Löwis59e47792009-01-24 14:10:07 +00001065 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001066 if not os.path.isdir(targetpath):
1067 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001068 return targetpath
1069
Antoine Pitrou17babc52012-11-17 23:50:08 +01001070 with self.open(member, pwd=pwd) as source, \
1071 open(targetpath, "wb") as target:
1072 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001073
1074 return targetpath
1075
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001077 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001078 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001079 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001080 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001082 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001083 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001084 raise RuntimeError(
1085 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001086 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001087 raise RuntimeError(
1088 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001090 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001091 if zinfo.file_size > ZIP64_LIMIT:
1092 if not self._allowZip64:
1093 raise LargeZipFile("Filesize would require ZIP64 extensions")
1094 if zinfo.header_offset > ZIP64_LIMIT:
1095 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001096 raise LargeZipFile(
1097 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098
1099 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001100 """Put the bytes from filename into the archive under the name
1101 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001102 if not self.fp:
1103 raise RuntimeError(
1104 "Attempt to write to ZIP archive that was already closed")
1105
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001107 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001108 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 date_time = mtime[0:6]
1110 # Create ZipInfo instance to store file information
1111 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001112 arcname = filename
1113 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1114 while arcname[0] in (os.sep, os.altsep):
1115 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001116 if isdir:
1117 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001118 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001119 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001121 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 else:
Tim Peterse1190062001-01-15 03:34:38 +00001123 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001124
1125 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001126 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001127 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001128
1129 self._writecheck(zinfo)
1130 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001131
1132 if isdir:
1133 zinfo.file_size = 0
1134 zinfo.compress_size = 0
1135 zinfo.CRC = 0
1136 self.filelist.append(zinfo)
1137 self.NameToInfo[zinfo.filename] = zinfo
1138 self.fp.write(zinfo.FileHeader())
1139 return
1140
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001141 with open(filename, "rb") as fp:
1142 # Must overwrite CRC and sizes with correct data later
1143 zinfo.CRC = CRC = 0
1144 zinfo.compress_size = compress_size = 0
1145 zinfo.file_size = file_size = 0
1146 self.fp.write(zinfo.FileHeader())
1147 if zinfo.compress_type == ZIP_DEFLATED:
1148 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1149 zlib.DEFLATED, -15)
1150 else:
1151 cmpr = None
1152 while 1:
1153 buf = fp.read(1024 * 8)
1154 if not buf:
1155 break
1156 file_size = file_size + len(buf)
1157 CRC = crc32(buf, CRC) & 0xffffffff
1158 if cmpr:
1159 buf = cmpr.compress(buf)
1160 compress_size = compress_size + len(buf)
1161 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001162 if cmpr:
1163 buf = cmpr.flush()
1164 compress_size = compress_size + len(buf)
1165 self.fp.write(buf)
1166 zinfo.compress_size = compress_size
1167 else:
1168 zinfo.compress_size = file_size
1169 zinfo.CRC = CRC
1170 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001171 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001172 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001173 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001174 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001176 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177 self.filelist.append(zinfo)
1178 self.NameToInfo[zinfo.filename] = zinfo
1179
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001180 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001181 """Write a file into the archive. The contents is 'data', which
1182 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1183 it is encoded as UTF-8 first.
1184 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001185 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001186 if isinstance(data, str):
1187 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001188 if not isinstance(zinfo_or_arcname, ZipInfo):
1189 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001190 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001191 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001192 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001193 else:
1194 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001195
1196 if not self.fp:
1197 raise RuntimeError(
1198 "Attempt to write to ZIP archive that was already closed")
1199
Guido van Rossum85825dc2007-08-27 17:03:28 +00001200 zinfo.file_size = len(data) # Uncompressed size
1201 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001202 if compress_type is not None:
1203 zinfo.compress_type = compress_type
1204
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001205 self._writecheck(zinfo)
1206 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001207 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001208 if zinfo.compress_type == ZIP_DEFLATED:
1209 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1210 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001211 data = co.compress(data) + co.flush()
1212 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213 else:
1214 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001215 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001216 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001217 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001220 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001221 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001222 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001223 self.filelist.append(zinfo)
1224 self.NameToInfo[zinfo.filename] = zinfo
1225
1226 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001227 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001228 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001229
1230 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001231 """Close the file, and for mode "w" and "a" write the ending
1232 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001233 if self.fp is None:
1234 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001235
Antoine Pitrou17babc52012-11-17 23:50:08 +01001236 try:
1237 if self.mode in ("w", "a") and self._didModify: # write ending records
1238 count = 0
1239 pos1 = self.fp.tell()
1240 for zinfo in self.filelist: # write central directory
1241 count = count + 1
1242 dt = zinfo.date_time
1243 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1244 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1245 extra = []
1246 if zinfo.file_size > ZIP64_LIMIT \
1247 or zinfo.compress_size > ZIP64_LIMIT:
1248 extra.append(zinfo.file_size)
1249 extra.append(zinfo.compress_size)
1250 file_size = 0xffffffff
1251 compress_size = 0xffffffff
1252 else:
1253 file_size = zinfo.file_size
1254 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001255
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 if zinfo.header_offset > ZIP64_LIMIT:
1257 extra.append(zinfo.header_offset)
1258 header_offset = 0xffffffff
1259 else:
1260 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001261
Antoine Pitrou17babc52012-11-17 23:50:08 +01001262 extra_data = zinfo.extra
1263 if extra:
1264 # Append a ZIP64 field to the extra's
1265 extra_data = struct.pack(
1266 '<HH' + 'Q'*len(extra),
1267 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001268
Antoine Pitrou17babc52012-11-17 23:50:08 +01001269 extract_version = max(45, zinfo.extract_version)
1270 create_version = max(45, zinfo.create_version)
1271 else:
1272 extract_version = zinfo.extract_version
1273 create_version = zinfo.create_version
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001274
Antoine Pitrou17babc52012-11-17 23:50:08 +01001275 try:
1276 filename, flag_bits = zinfo._encodeFilenameFlags()
1277 centdir = struct.pack(structCentralDir,
1278 stringCentralDir, create_version,
1279 zinfo.create_system, extract_version, zinfo.reserved,
1280 flag_bits, zinfo.compress_type, dostime, dosdate,
1281 zinfo.CRC, compress_size, file_size,
1282 len(filename), len(extra_data), len(zinfo.comment),
1283 0, zinfo.internal_attr, zinfo.external_attr,
1284 header_offset)
1285 except DeprecationWarning:
1286 print((structCentralDir, stringCentralDir, create_version,
1287 zinfo.create_system, extract_version, zinfo.reserved,
1288 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1289 zinfo.CRC, compress_size, file_size,
1290 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1291 0, zinfo.internal_attr, zinfo.external_attr,
1292 header_offset), file=sys.stderr)
1293 raise
1294 self.fp.write(centdir)
1295 self.fp.write(filename)
1296 self.fp.write(extra_data)
1297 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001298
Antoine Pitrou17babc52012-11-17 23:50:08 +01001299 pos2 = self.fp.tell()
1300 # Write end-of-zip-archive record
1301 centDirCount = count
1302 centDirSize = pos2 - pos1
1303 centDirOffset = pos1
1304 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1305 centDirOffset > ZIP64_LIMIT or
1306 centDirSize > ZIP64_LIMIT):
1307 # Need to write the ZIP64 end-of-archive records
1308 zip64endrec = struct.pack(
1309 structEndArchive64, stringEndArchive64,
1310 44, 45, 45, 0, 0, centDirCount, centDirCount,
1311 centDirSize, centDirOffset)
1312 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001313
Antoine Pitrou17babc52012-11-17 23:50:08 +01001314 zip64locrec = struct.pack(
1315 structEndArchive64Locator,
1316 stringEndArchive64Locator, 0, pos2, 1)
1317 self.fp.write(zip64locrec)
1318 centDirCount = min(centDirCount, 0xFFFF)
1319 centDirSize = min(centDirSize, 0xFFFFFFFF)
1320 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001321
Antoine Pitrou17babc52012-11-17 23:50:08 +01001322 endrec = struct.pack(structEndArchive, stringEndArchive,
1323 0, 0, centDirCount, centDirCount,
1324 centDirSize, centDirOffset, len(self._comment))
1325 self.fp.write(endrec)
1326 self.fp.write(self._comment)
1327 self.fp.flush()
1328 finally:
1329 fp = self.fp
1330 self.fp = None
1331 if not self._filePassed:
1332 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333
1334
1335class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001336 """Class to create ZIP archives with Python library files and packages."""
1337
Georg Brandl8334fd92010-12-04 10:26:46 +00001338 def __init__(self, file, mode="r", compression=ZIP_STORED,
1339 allowZip64=False, optimize=-1):
1340 ZipFile.__init__(self, file, mode=mode, compression=compression,
1341 allowZip64=allowZip64)
1342 self._optimize = optimize
1343
Georg Brandlfe991052009-09-16 15:54:04 +00001344 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 """Add all files from "pathname" to the ZIP archive.
1346
Fred Drake484d7352000-10-02 21:14:52 +00001347 If pathname is a package directory, search the directory and
1348 all package subdirectories recursively for all *.py and enter
1349 the modules into the archive. If pathname is a plain
1350 directory, listdir *.py and enter all modules. Else, pathname
1351 must be a Python *.py file and the module will be put into the
1352 archive. Added modules are always module.pyo or module.pyc.
1353 This method will compile the module.py into module.pyc if
1354 necessary.
1355 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001356 dir, name = os.path.split(pathname)
1357 if os.path.isdir(pathname):
1358 initname = os.path.join(pathname, "__init__.py")
1359 if os.path.isfile(initname):
1360 # This is a package directory, add it
1361 if basename:
1362 basename = "%s/%s" % (basename, name)
1363 else:
1364 basename = name
1365 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001366 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 fname, arcname = self._get_codename(initname[0:-3], basename)
1368 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001369 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001370 self.write(fname, arcname)
1371 dirlist = os.listdir(pathname)
1372 dirlist.remove("__init__.py")
1373 # Add all *.py files and package subdirectories
1374 for filename in dirlist:
1375 path = os.path.join(pathname, filename)
1376 root, ext = os.path.splitext(filename)
1377 if os.path.isdir(path):
1378 if os.path.isfile(os.path.join(path, "__init__.py")):
1379 # This is a package directory, add it
1380 self.writepy(path, basename) # Recursive call
1381 elif ext == ".py":
1382 fname, arcname = self._get_codename(path[0:-3],
1383 basename)
1384 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001385 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386 self.write(fname, arcname)
1387 else:
1388 # This is NOT a package directory, add its files at top level
1389 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001390 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391 for filename in os.listdir(pathname):
1392 path = os.path.join(pathname, filename)
1393 root, ext = os.path.splitext(filename)
1394 if ext == ".py":
1395 fname, arcname = self._get_codename(path[0:-3],
1396 basename)
1397 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001398 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 self.write(fname, arcname)
1400 else:
1401 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001402 raise RuntimeError(
1403 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001404 fname, arcname = self._get_codename(pathname[0:-3], basename)
1405 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001406 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407 self.write(fname, arcname)
1408
1409 def _get_codename(self, pathname, basename):
1410 """Return (filename, archivename) for the path.
1411
Fred Drake484d7352000-10-02 21:14:52 +00001412 Given a module name path, return the correct file path and
1413 archive name, compiling if necessary. For example, given
1414 /python/lib/string, return (/python/lib/string.pyc, string).
1415 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001416 def _compile(file, optimize=-1):
1417 import py_compile
1418 if self.debug:
1419 print("Compiling", file)
1420 try:
1421 py_compile.compile(file, doraise=True, optimize=optimize)
1422 except py_compile.PyCompileError as error:
1423 print(err.msg)
1424 return False
1425 return True
1426
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001427 file_py = pathname + ".py"
1428 file_pyc = pathname + ".pyc"
1429 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001430 pycache_pyc = imp.cache_from_source(file_py, True)
1431 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001432 if self._optimize == -1:
1433 # legacy mode: use whatever file is present
1434 if (os.path.isfile(file_pyo) and
1435 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1436 # Use .pyo file.
1437 arcname = fname = file_pyo
1438 elif (os.path.isfile(file_pyc) and
1439 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1440 # Use .pyc file.
1441 arcname = fname = file_pyc
1442 elif (os.path.isfile(pycache_pyc) and
1443 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1444 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1445 # file name in the archive.
1446 fname = pycache_pyc
1447 arcname = file_pyc
1448 elif (os.path.isfile(pycache_pyo) and
1449 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1450 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1451 # file name in the archive.
1452 fname = pycache_pyo
1453 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001454 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001455 # Compile py into PEP 3147 pyc file.
1456 if _compile(file_py):
1457 fname = (pycache_pyc if __debug__ else pycache_pyo)
1458 arcname = (file_pyc if __debug__ else file_pyo)
1459 else:
1460 fname = arcname = file_py
1461 else:
1462 # new mode: use given optimization level
1463 if self._optimize == 0:
1464 fname = pycache_pyc
1465 arcname = file_pyc
1466 else:
1467 fname = pycache_pyo
1468 arcname = file_pyo
1469 if not (os.path.isfile(fname) and
1470 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1471 if not _compile(file_py, optimize=self._optimize):
1472 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001473 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001474 if basename:
1475 archivename = "%s/%s" % (basename, archivename)
1476 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001477
1478
1479def main(args = None):
1480 import textwrap
1481 USAGE=textwrap.dedent("""\
1482 Usage:
1483 zipfile.py -l zipfile.zip # Show listing of a zipfile
1484 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1485 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1486 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1487 """)
1488 if args is None:
1489 args = sys.argv[1:]
1490
1491 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001492 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001493 sys.exit(1)
1494
1495 if args[0] == '-l':
1496 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001497 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001498 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001499 with ZipFile(args[1], 'r') as zf:
1500 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501
1502 elif args[0] == '-t':
1503 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001504 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001505 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001506 with ZipFile(args[1], 'r') as zf:
1507 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001508 if badfile:
1509 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001510 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001511
1512 elif args[0] == '-e':
1513 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001514 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001515 sys.exit(1)
1516
Antoine Pitrou17babc52012-11-17 23:50:08 +01001517 with ZipFile(args[1], 'r') as zf:
1518 out = args[2]
1519 for path in zf.namelist():
1520 if path.startswith('./'):
1521 tgt = os.path.join(out, path[2:])
1522 else:
1523 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001524
Antoine Pitrou17babc52012-11-17 23:50:08 +01001525 tgtdir = os.path.dirname(tgt)
1526 if not os.path.exists(tgtdir):
1527 os.makedirs(tgtdir)
1528 with open(tgt, 'wb') as fp:
1529 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001530
1531 elif args[0] == '-c':
1532 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001533 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001534 sys.exit(1)
1535
1536 def addToZip(zf, path, zippath):
1537 if os.path.isfile(path):
1538 zf.write(path, zippath, ZIP_DEFLATED)
1539 elif os.path.isdir(path):
1540 for nm in os.listdir(path):
1541 addToZip(zf,
1542 os.path.join(path, nm), os.path.join(zippath, nm))
1543 # else: ignore
1544
Antoine Pitrou17babc52012-11-17 23:50:08 +01001545 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1546 for src in args[2:]:
1547 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001548
1549if __name__ == "__main__":
1550 main()