blob: dcebf7211ef85ca93bf2cd0d156f0f7907b05c9f [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
32except ImportError:
33 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_FILECOUNT_LIMIT = 1 << 16
54ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000167 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
183 except IOError:
184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
193 except IOError:
194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000199 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
200 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 return endrec
202
203 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000204 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000205
206 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000207 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
208 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209 sig, sz, create_version, read_version, disk_num, disk_dir, \
210 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000211 struct.unpack(structEndArchive64, data)
212 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 return endrec
214
215 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000216 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000217 endrec[_ECD_DISK_NUMBER] = disk_num
218 endrec[_ECD_DISK_START] = disk_dir
219 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
220 endrec[_ECD_ENTRIES_TOTAL] = dircount2
221 endrec[_ECD_SIZE] = dirsize
222 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000223 return endrec
224
225
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000226def _EndRecData(fpin):
227 """Return data from the "End of Central Directory" record, or None.
228
229 The data is a list of the nine items in the ZIP "End of central dir"
230 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231
232 # Determine file size
233 fpin.seek(0, 2)
234 filesize = fpin.tell()
235
236 # Check to see if this is ZIP file with no archive comment (the
237 # "end of central directory" structure should be the last item in the
238 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000239 try:
240 fpin.seek(-sizeEndCentDir, 2)
241 except IOError:
242 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000246 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec=list(endrec)
248
249 # Append a blank comment and record start offset
250 endrec.append(b"")
251 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255
256 # Either this is not a ZIP file, or it is a ZIP file with an archive
257 # comment. Search the end of the file for the "end of central directory"
258 # record signature. The comment is the last item in the ZIP file and may be
259 # up to 64K long. It is assumed that the "end of central directory" magic
260 # number does not appear in the comment.
261 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
262 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000263 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000264 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265 if start >= 0:
266 # found the magic number; attempt to unpack and interpret
267 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000268 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400269 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
270 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
271 endrec.append(comment)
272 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000273
R David Murray4fbb9db2011-06-09 15:50:51 -0400274 # Try to read the "Zip64 end of central directory" structure
275 return _EndRecData64(fpin, maxCommentStart + start - filesize,
276 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000277
278 # Unable to find a valid end of central directory structure
279 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000280
Fred Drake484d7352000-10-02 21:14:52 +0000281
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000283 """Class with attributes describing each file in the ZIP archive."""
284
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 __slots__ = (
286 'orig_filename',
287 'filename',
288 'date_time',
289 'compress_type',
290 'comment',
291 'extra',
292 'create_system',
293 'create_version',
294 'extract_version',
295 'reserved',
296 'flag_bits',
297 'volume',
298 'internal_attr',
299 'external_attr',
300 'header_offset',
301 'CRC',
302 'compress_size',
303 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000304 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000305 )
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000308 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000309
310 # Terminate the file name at the first null byte. Null bytes in file
311 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000312 null_byte = filename.find(chr(0))
313 if null_byte >= 0:
314 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315 # This is used to ensure paths in generated ZIP files always use
316 # forward slashes as the directory separator, as required by the
317 # ZIP format specification.
318 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000319 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000322 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800323
324 if date_time[0] < 1980:
325 raise ValueError('ZIP does not support timestamps before 1980')
326
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000328 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000329 self.comment = b"" # Comment for each file
330 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000331 if sys.platform == 'win32':
332 self.create_system = 0 # System which created ZIP archive
333 else:
334 # Assume everything else is unix-y
335 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200336 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
337 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.reserved = 0 # Must be zero
339 self.flag_bits = 0 # ZIP flag bits
340 self.volume = 0 # Volume number of file header
341 self.internal_attr = 0 # Internal attributes
342 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000344 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000345 # CRC CRC-32 of the uncompressed file
346 # compress_size Size of the compressed file
347 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200349 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000350 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000351 dt = self.date_time
352 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000353 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000354 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000355 # Set these to zero because we write them after the file data
356 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 else:
Tim Peterse1190062001-01-15 03:34:38 +0000358 CRC = self.CRC
359 compress_size = self.compress_size
360 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000361
362 extra = self.extra
363
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200364 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200365 if zip64 is None:
366 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
367 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 extra = extra + struct.pack(fmt,
370 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200371 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
372 if not zip64:
373 raise LargeZipFile("Filesize would require ZIP64 extensions")
374 # File is larger than what fits into a 4 byte integer,
375 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000376 file_size = 0xffffffff
377 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200378 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200380 if self.compress_type == ZIP_BZIP2:
381 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200382 elif self.compress_type == ZIP_LZMA:
383 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200384
385 self.extract_version = max(min_version, self.extract_version)
386 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000387 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000388 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000389 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000390 self.compress_type, dostime, dosdate, CRC,
391 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000392 len(filename), len(extra))
393 return header + filename + extra
394
395 def _encodeFilenameFlags(self):
396 try:
397 return self.filename.encode('ascii'), self.flag_bits
398 except UnicodeEncodeError:
399 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400
401 def _decodeExtra(self):
402 # Try to decode the extra field.
403 extra = self.extra
404 unpack = struct.unpack
405 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000406 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 if tp == 1:
408 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000409 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000410 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000411 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000412 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000413 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414 elif ln == 0:
415 counts = ()
416 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000417 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000418
419 idx = 0
420
421 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000422 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000423 self.file_size = counts[idx]
424 idx += 1
425
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000426 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 self.compress_size = counts[idx]
428 idx += 1
429
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000430 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000431 old = self.header_offset
432 self.header_offset = counts[idx]
433 idx+=1
434
435 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000436
437
Thomas Wouterscf297e42007-02-23 15:07:44 +0000438class _ZipDecrypter:
439 """Class to handle decryption of files stored within a ZIP archive.
440
441 ZIP supports a password-based form of encryption. Even though known
442 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000443 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000444
445 Usage:
446 zd = _ZipDecrypter(mypwd)
447 plain_char = zd(cypher_char)
448 plain_text = map(zd, cypher_text)
449 """
450
451 def _GenerateCRCTable():
452 """Generate a CRC-32 table.
453
454 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
455 internal keys. We noticed that a direct implementation is faster than
456 relying on binascii.crc32().
457 """
458 poly = 0xedb88320
459 table = [0] * 256
460 for i in range(256):
461 crc = i
462 for j in range(8):
463 if crc & 1:
464 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
465 else:
466 crc = ((crc >> 1) & 0x7FFFFFFF)
467 table[i] = crc
468 return table
469 crctable = _GenerateCRCTable()
470
471 def _crc32(self, ch, crc):
472 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000473 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000474
475 def __init__(self, pwd):
476 self.key0 = 305419896
477 self.key1 = 591751049
478 self.key2 = 878082192
479 for p in pwd:
480 self._UpdateKeys(p)
481
482 def _UpdateKeys(self, c):
483 self.key0 = self._crc32(c, self.key0)
484 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
485 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000486 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000487
488 def __call__(self, c):
489 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000490 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000491 k = self.key2 | 2
492 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000493 self._UpdateKeys(c)
494 return c
495
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200496
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200497class LZMACompressor:
498
499 def __init__(self):
500 self._comp = None
501
502 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200503 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200504 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200505 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200506 ])
507 return struct.pack('<BBH', 9, 4, len(props)) + props
508
509 def compress(self, data):
510 if self._comp is None:
511 return self._init() + self._comp.compress(data)
512 return self._comp.compress(data)
513
514 def flush(self):
515 if self._comp is None:
516 return self._init() + self._comp.flush()
517 return self._comp.flush()
518
519
520class LZMADecompressor:
521
522 def __init__(self):
523 self._decomp = None
524 self._unconsumed = b''
525 self.eof = False
526
527 def decompress(self, data):
528 if self._decomp is None:
529 self._unconsumed += data
530 if len(self._unconsumed) <= 4:
531 return b''
532 psize, = struct.unpack('<H', self._unconsumed[2:4])
533 if len(self._unconsumed) <= 4 + psize:
534 return b''
535
536 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200537 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200538 self._unconsumed[4:4 + psize])
539 ])
540 data = self._unconsumed[4 + psize:]
541 del self._unconsumed
542
543 result = self._decomp.decompress(data)
544 self.eof = self._decomp.eof
545 return result
546
547
548compressor_names = {
549 0: 'store',
550 1: 'shrink',
551 2: 'reduce',
552 3: 'reduce',
553 4: 'reduce',
554 5: 'reduce',
555 6: 'implode',
556 7: 'tokenize',
557 8: 'deflate',
558 9: 'deflate64',
559 10: 'implode',
560 12: 'bzip2',
561 14: 'lzma',
562 18: 'terse',
563 19: 'lz77',
564 97: 'wavpack',
565 98: 'ppmd',
566}
567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568def _check_compression(compression):
569 if compression == ZIP_STORED:
570 pass
571 elif compression == ZIP_DEFLATED:
572 if not zlib:
573 raise RuntimeError(
574 "Compression requires the (missing) zlib module")
575 elif compression == ZIP_BZIP2:
576 if not bz2:
577 raise RuntimeError(
578 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200579 elif compression == ZIP_LZMA:
580 if not lzma:
581 raise RuntimeError(
582 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200583 else:
584 raise RuntimeError("That compression method is not supported")
585
586
587def _get_compressor(compress_type):
588 if compress_type == ZIP_DEFLATED:
589 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
590 zlib.DEFLATED, -15)
591 elif compress_type == ZIP_BZIP2:
592 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200593 elif compress_type == ZIP_LZMA:
594 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200595 else:
596 return None
597
598
599def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200600 if compress_type == ZIP_STORED:
601 return None
602 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200603 return zlib.decompressobj(-15)
604 elif compress_type == ZIP_BZIP2:
605 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200606 elif compress_type == ZIP_LZMA:
607 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200608 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200609 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200610 if descr:
611 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
612 else:
613 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200614
615
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000617 """File-like object for reading an archive member.
618 Is returned by ZipFile.open().
619 """
620
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 # Max size supported by decompressor.
622 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000623
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000624 # Read from compressed files in 4k blocks.
625 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000626
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627 # Search for universal newlines or line chunks.
628 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
629
Łukasz Langae94980a2010-11-22 23:31:26 +0000630 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
631 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 self._fileobj = fileobj
633 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000634 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635
Ezio Melotti92b47432010-01-28 01:44:41 +0000636 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000637 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200638 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000641
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000643 self._readbuffer = b''
644 self._offset = 0
645
646 self._universal = 'U' in mode
647 self.newlines = None
648
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000649 # Adjust read size for encrypted files since the first 12 bytes
650 # are for the encryption/password information.
651 if self._decrypter is not None:
652 self._compress_left -= 12
653
654 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000655 self.name = zipinfo.filename
656
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000657 if hasattr(zipinfo, 'CRC'):
658 self._expected_crc = zipinfo.CRC
659 self._running_crc = crc32(b'') & 0xffffffff
660 else:
661 self._expected_crc = None
662
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000663 def readline(self, limit=-1):
664 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000665
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000666 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000667 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000668
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000669 if not self._universal and limit < 0:
670 # Shortcut common case - newline found in buffer.
671 i = self._readbuffer.find(b'\n', self._offset) + 1
672 if i > 0:
673 line = self._readbuffer[self._offset: i]
674 self._offset = i
675 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000676
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000677 if not self._universal:
678 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000679
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000680 line = b''
681 while limit < 0 or len(line) < limit:
682 readahead = self.peek(2)
683 if readahead == b'':
684 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000685
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000686 #
687 # Search for universal newlines or line chunks.
688 #
689 # The pattern returns either a line chunk or a newline, but not
690 # both. Combined with peek(2), we are assured that the sequence
691 # '\r\n' is always retrieved completely and never split into
692 # separate newlines - '\r', '\n' due to coincidental readaheads.
693 #
694 match = self.PATTERN.search(readahead)
695 newline = match.group('newline')
696 if newline is not None:
697 if self.newlines is None:
698 self.newlines = []
699 if newline not in self.newlines:
700 self.newlines.append(newline)
701 self._offset += len(newline)
702 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000703
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000704 chunk = match.group('chunk')
705 if limit >= 0:
706 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000707
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000708 self._offset += len(chunk)
709 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000710
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000711 return line
712
713 def peek(self, n=1):
714 """Returns buffered bytes without advancing the position."""
715 if n > len(self._readbuffer) - self._offset:
716 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200717 if len(chunk) > self._offset:
718 self._readbuffer = chunk + self._readbuffer[self._offset:]
719 self._offset = 0
720 else:
721 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000722
723 # Return up to 512 bytes to reduce allocation overhead for tight loops.
724 return self._readbuffer[self._offset: self._offset + 512]
725
726 def readable(self):
727 return True
728
729 def read(self, n=-1):
730 """Read and return up to n bytes.
731 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200733 if n is None or n < 0:
734 buf = self._readbuffer[self._offset:]
735 self._readbuffer = b''
736 self._offset = 0
737 while not self._eof:
738 buf += self._read1(self.MAX_N)
739 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000740
Antoine Pitrou78157b32012-06-23 16:44:48 +0200741 end = n + self._offset
742 if end < len(self._readbuffer):
743 buf = self._readbuffer[self._offset:end]
744 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200745 return buf
746
Antoine Pitrou78157b32012-06-23 16:44:48 +0200747 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200748 buf = self._readbuffer[self._offset:]
749 self._readbuffer = b''
750 self._offset = 0
751 while n > 0 and not self._eof:
752 data = self._read1(n)
753 if n < len(data):
754 self._readbuffer = data
755 self._offset = n
756 buf += data[:n]
757 break
758 buf += data
759 n -= len(data)
760 return buf
761
762 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000763 # Update the CRC using the given data.
764 if self._expected_crc is None:
765 # No need to compute the CRC if we don't have a reference value
766 return
767 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
768 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200769 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000770 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000771
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000772 def read1(self, n):
773 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200775 if n is None or n < 0:
776 buf = self._readbuffer[self._offset:]
777 self._readbuffer = b''
778 self._offset = 0
779 data = self._read1(self.MAX_N)
780 buf += data
781 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782
Antoine Pitrou78157b32012-06-23 16:44:48 +0200783 end = n + self._offset
784 if end < len(self._readbuffer):
785 buf = self._readbuffer[self._offset:end]
786 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200787 return buf
788
Antoine Pitrou78157b32012-06-23 16:44:48 +0200789 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200790 buf = self._readbuffer[self._offset:]
791 self._readbuffer = b''
792 self._offset = 0
793 if n > 0:
794 data = self._read1(n)
795 if n < len(data):
796 self._readbuffer = data
797 self._offset = n
798 data = data[:n]
799 buf += data
800 return buf
801
802 def _read1(self, n):
803 # Read up to n compressed bytes with at most one read() system call,
804 # decrypt and decompress them.
805 if self._eof or n <= 0:
806 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000808 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200809 if self._compress_type == ZIP_DEFLATED:
810 ## Handle unconsumed data.
811 data = self._decompressor.unconsumed_tail
812 if n > len(data):
813 data += self._read2(n - len(data))
814 else:
815 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200817 if self._compress_type == ZIP_STORED:
818 self._eof = self._compress_left <= 0
819 elif self._compress_type == ZIP_DEFLATED:
820 n = max(n, self.MIN_READ_SIZE)
821 data = self._decompressor.decompress(data, n)
822 self._eof = (self._decompressor.eof or
823 self._compress_left <= 0 and
824 not self._decompressor.unconsumed_tail)
825 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000826 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200827 else:
828 data = self._decompressor.decompress(data)
829 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200831 data = data[:self._left]
832 self._left -= len(data)
833 if self._left <= 0:
834 self._eof = True
835 self._update_crc(data)
836 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000837
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200838 def _read2(self, n):
839 if self._compress_left <= 0:
840 return b''
841
842 n = max(n, self.MIN_READ_SIZE)
843 n = min(n, self._compress_left)
844
845 data = self._fileobj.read(n)
846 self._compress_left -= len(data)
847
848 if self._decrypter is not None:
849 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000850 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Łukasz Langae94980a2010-11-22 23:31:26 +0000852 def close(self):
853 try:
854 if self._close_fileobj:
855 self._fileobj.close()
856 finally:
857 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000859
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000860class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000861 """ Class with methods to open, read, write, close, list zip files.
862
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000863 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000864
Fred Drake3d9091e2001-03-26 15:49:24 +0000865 file: Either the path to the file, or a file-like object.
866 If it is a path, the file will be opened and closed by ZipFile.
867 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200868 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
869 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000870 allowZip64: if True ZipFile will create files with ZIP64 extensions when
871 needed, otherwise it will raise an exception when this would
872 be necessary.
873
Fred Drake3d9091e2001-03-26 15:49:24 +0000874 """
Fred Drake484d7352000-10-02 21:14:52 +0000875
Fred Drake90eac282001-02-28 05:29:34 +0000876 fp = None # Set here since __del__ checks it
877
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000878 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000879 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000880 if mode not in ("r", "w", "a"):
881 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
882
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200883 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000884
885 self._allowZip64 = allowZip64
886 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000887 self.debug = 0 # Level of printing: 0 through 3
888 self.NameToInfo = {} # Find file info given name
889 self.filelist = [] # List of ZipInfo instances for archive
890 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000891 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000892 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400893 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000894
Fred Drake3d9091e2001-03-26 15:49:24 +0000895 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000896 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000897 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000898 self._filePassed = 0
899 self.filename = file
900 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000901 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000902 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000903 except IOError:
904 if mode == 'a':
905 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000906 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000907 else:
908 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000909 else:
910 self._filePassed = 1
911 self.fp = file
912 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000913
Antoine Pitrou17babc52012-11-17 23:50:08 +0100914 try:
915 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000916 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100917 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000918 # set the modified flag so central directory gets written
919 # even if no files are added to the archive
920 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100921 elif key == 'a':
922 try:
923 # See if file is a zip file
924 self._RealGetContents()
925 # seek to start of directory and overwrite
926 self.fp.seek(self.start_dir, 0)
927 except BadZipFile:
928 # file is not a zip file, just append
929 self.fp.seek(0, 2)
930
931 # set the modified flag so central directory gets written
932 # even if no files are added to the archive
933 self._didModify = True
934 else:
935 raise RuntimeError('Mode must be "r", "w" or "a"')
936 except:
937 fp = self.fp
938 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000939 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100940 fp.close()
941 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000942
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000943 def __enter__(self):
944 return self
945
946 def __exit__(self, type, value, traceback):
947 self.close()
948
Tim Peters7d3bad62001-04-04 18:56:49 +0000949 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000950 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000951 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000952 try:
953 endrec = _EndRecData(fp)
954 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000955 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000956 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000957 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000958 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000959 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000960 size_cd = endrec[_ECD_SIZE] # bytes in central directory
961 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400962 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000963
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000965 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000966 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
967 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000968 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
969
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000970 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000971 inferred = concat + offset_cd
972 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000973 # self.start_dir: Position of start of central directory
974 self.start_dir = offset_cd + concat
975 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000976 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000977 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000978 total = 0
979 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000980 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000981 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000982 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000983 centdir = struct.unpack(structCentralDir, centdir)
984 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000985 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000986 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000987 flags = centdir[5]
988 if flags & 0x800:
989 # UTF-8 file names extension
990 filename = filename.decode('utf-8')
991 else:
992 # Historical ZIP filename encoding
993 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000995 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000996 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
997 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000998 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 (x.create_version, x.create_system, x.extract_version, x.reserved,
1000 x.flag_bits, x.compress_type, t, d,
1001 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001002 if x.extract_version > MAX_EXTRACT_VERSION:
1003 raise NotImplementedError("zip file version %.1f" %
1004 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1006 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001007 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +00001009 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001010
1011 x._decodeExtra()
1012 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013 self.filelist.append(x)
1014 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001015
1016 # update total bytes read from central directory
1017 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1018 + centdir[_CD_EXTRA_FIELD_LENGTH]
1019 + centdir[_CD_COMMENT_LENGTH])
1020
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001022 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001023
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024
1025 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001026 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001027 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028
1029 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001030 """Return a list of class ZipInfo instances for files in the
1031 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 return self.filelist
1033
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001034 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001035 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001036 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1037 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001038 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001039 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001040 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1041 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042
1043 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001044 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001045 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 for zinfo in self.filelist:
1047 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001048 # Read by chunks, to avoid an OverflowError or a
1049 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001050 with self.open(zinfo.filename, "r") as f:
1051 while f.read(chunk_size): # Check CRC-32
1052 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001053 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 return zinfo.filename
1055
1056 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001057 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001058 info = self.NameToInfo.get(name)
1059 if info is None:
1060 raise KeyError(
1061 'There is no item named %r in the archive' % name)
1062
1063 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064
Thomas Wouterscf297e42007-02-23 15:07:44 +00001065 def setpassword(self, pwd):
1066 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001067 if pwd and not isinstance(pwd, bytes):
1068 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1069 if pwd:
1070 self.pwd = pwd
1071 else:
1072 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001073
R David Murrayf50b38a2012-04-12 18:44:58 -04001074 @property
1075 def comment(self):
1076 """The comment text associated with the ZIP file."""
1077 return self._comment
1078
1079 @comment.setter
1080 def comment(self, comment):
1081 if not isinstance(comment, bytes):
1082 raise TypeError("comment: expected bytes, got %s" % type(comment))
1083 # check for valid comment length
1084 if len(comment) >= ZIP_MAX_COMMENT:
1085 if self.debug:
1086 print('Archive comment is too long; truncating to %d bytes'
1087 % ZIP_MAX_COMMENT)
1088 comment = comment[:ZIP_MAX_COMMENT]
1089 self._comment = comment
1090 self._didModify = True
1091
Thomas Wouterscf297e42007-02-23 15:07:44 +00001092 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001093 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001094 with self.open(name, "r", pwd) as fp:
1095 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001096
1097 def open(self, name, mode="r", pwd=None):
1098 """Return file-like object for 'name'."""
1099 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001100 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001101 if pwd and not isinstance(pwd, bytes):
1102 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001104 raise RuntimeError(
1105 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 # Only open a new file for instances where we were not
1108 # given a file object in the constructor
1109 if self._filePassed:
1110 zef_file = self.fp
1111 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001112 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113
Antoine Pitrou17babc52012-11-17 23:50:08 +01001114 try:
1115 # Make sure we have an info object
1116 if isinstance(name, ZipInfo):
1117 # 'name' is already an info object
1118 zinfo = name
1119 else:
1120 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001121 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001122 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001123
Antoine Pitrou17babc52012-11-17 23:50:08 +01001124 # Skip the file header:
1125 fheader = zef_file.read(sizeFileHeader)
1126 if fheader[0:4] != stringFileHeader:
1127 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001128
Antoine Pitrou17babc52012-11-17 23:50:08 +01001129 fheader = struct.unpack(structFileHeader, fheader)
1130 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1131 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1132 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133
Antoine Pitrou8572da52012-11-17 23:52:05 +01001134 if zinfo.flag_bits & 0x20:
1135 # Zip 2.7: compressed patched data
1136 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001137
Antoine Pitrou8572da52012-11-17 23:52:05 +01001138 if zinfo.flag_bits & 0x40:
1139 # strong encryption
1140 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001141
Antoine Pitrou17babc52012-11-17 23:50:08 +01001142 if zinfo.flag_bits & 0x800:
1143 # UTF-8 filename
1144 fname_str = fname.decode("utf-8")
1145 else:
1146 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001147
Antoine Pitrou17babc52012-11-17 23:50:08 +01001148 if fname_str != zinfo.orig_filename:
1149 raise BadZipFile(
1150 'File name in directory %r and header %r differ.'
1151 % (zinfo.orig_filename, fname))
1152
1153 # check for encrypted flag & handle password
1154 is_encrypted = zinfo.flag_bits & 0x1
1155 zd = None
1156 if is_encrypted:
1157 if not pwd:
1158 pwd = self.pwd
1159 if not pwd:
1160 raise RuntimeError("File %s is encrypted, password "
1161 "required for extraction" % name)
1162
1163 zd = _ZipDecrypter(pwd)
1164 # The first 12 bytes in the cypher stream is an encryption header
1165 # used to strengthen the algorithm. The first 11 bytes are
1166 # completely random, while the 12th contains the MSB of the CRC,
1167 # or the MSB of the file time depending on the header type
1168 # and is used to check the correctness of the password.
1169 header = zef_file.read(12)
1170 h = list(map(zd, header[0:12]))
1171 if zinfo.flag_bits & 0x8:
1172 # compare against the file type from extended local headers
1173 check_byte = (zinfo._raw_time >> 8) & 0xff
1174 else:
1175 # compare against the CRC otherwise
1176 check_byte = (zinfo.CRC >> 24) & 0xff
1177 if h[11] != check_byte:
1178 raise RuntimeError("Bad password for file", name)
1179
1180 return ZipExtFile(zef_file, mode, zinfo, zd,
1181 close_fileobj=not self._filePassed)
1182 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001183 if not self._filePassed:
1184 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001185 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186
Christian Heimes790c8232008-01-07 21:14:23 +00001187 def extract(self, member, path=None, pwd=None):
1188 """Extract a member from the archive to the current working directory,
1189 using its full name. Its file information is extracted as accurately
1190 as possible. `member' may be a filename or a ZipInfo object. You can
1191 specify a different directory using `path'.
1192 """
1193 if not isinstance(member, ZipInfo):
1194 member = self.getinfo(member)
1195
1196 if path is None:
1197 path = os.getcwd()
1198
1199 return self._extract_member(member, path, pwd)
1200
1201 def extractall(self, path=None, members=None, pwd=None):
1202 """Extract all members from the archive to the current working
1203 directory. `path' specifies a different directory to extract to.
1204 `members' is optional and must be a subset of the list returned
1205 by namelist().
1206 """
1207 if members is None:
1208 members = self.namelist()
1209
1210 for zipinfo in members:
1211 self.extract(zipinfo, path, pwd)
1212
1213 def _extract_member(self, member, targetpath, pwd):
1214 """Extract the ZipInfo object 'member' to a physical
1215 file on the path targetpath.
1216 """
1217 # build the destination pathname, replacing
1218 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001219 # Strip trailing path separator, unless it represents the root.
1220 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1221 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001222 targetpath = targetpath[:-1]
1223
1224 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001225 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001226 targetpath = os.path.join(targetpath, member.filename[1:])
1227 else:
1228 targetpath = os.path.join(targetpath, member.filename)
1229
1230 targetpath = os.path.normpath(targetpath)
1231
1232 # Create all upper directories if necessary.
1233 upperdirs = os.path.dirname(targetpath)
1234 if upperdirs and not os.path.exists(upperdirs):
1235 os.makedirs(upperdirs)
1236
Martin v. Löwis59e47792009-01-24 14:10:07 +00001237 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001238 if not os.path.isdir(targetpath):
1239 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001240 return targetpath
1241
Antoine Pitrou17babc52012-11-17 23:50:08 +01001242 with self.open(member, pwd=pwd) as source, \
1243 open(targetpath, "wb") as target:
1244 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001245
1246 return targetpath
1247
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001249 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001250 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001251 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001252 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001253 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001254 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001255 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001256 raise RuntimeError(
1257 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001258 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001259 if zinfo.file_size > ZIP64_LIMIT:
1260 if not self._allowZip64:
1261 raise LargeZipFile("Filesize would require ZIP64 extensions")
1262 if zinfo.header_offset > ZIP64_LIMIT:
1263 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001264 raise LargeZipFile(
1265 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001266
1267 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001268 """Put the bytes from filename into the archive under the name
1269 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001270 if not self.fp:
1271 raise RuntimeError(
1272 "Attempt to write to ZIP archive that was already closed")
1273
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001274 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001275 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001276 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277 date_time = mtime[0:6]
1278 # Create ZipInfo instance to store file information
1279 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001280 arcname = filename
1281 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1282 while arcname[0] in (os.sep, os.altsep):
1283 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001284 if isdir:
1285 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001286 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001287 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001289 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001290 else:
Tim Peterse1190062001-01-15 03:34:38 +00001291 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001292
1293 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001294 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001295 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001296 if zinfo.compress_type == ZIP_LZMA:
1297 # Compressed data includes an end-of-stream (EOS) marker
1298 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001299
1300 self._writecheck(zinfo)
1301 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001302
1303 if isdir:
1304 zinfo.file_size = 0
1305 zinfo.compress_size = 0
1306 zinfo.CRC = 0
1307 self.filelist.append(zinfo)
1308 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001309 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis59e47792009-01-24 14:10:07 +00001310 return
1311
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001312 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001313 with open(filename, "rb") as fp:
1314 # Must overwrite CRC and sizes with correct data later
1315 zinfo.CRC = CRC = 0
1316 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001317 # Compressed size can be larger than uncompressed size
1318 zip64 = self._allowZip64 and \
1319 zinfo.file_size * 1.05 > ZIP64_LIMIT
1320 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001321 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001322 while 1:
1323 buf = fp.read(1024 * 8)
1324 if not buf:
1325 break
1326 file_size = file_size + len(buf)
1327 CRC = crc32(buf, CRC) & 0xffffffff
1328 if cmpr:
1329 buf = cmpr.compress(buf)
1330 compress_size = compress_size + len(buf)
1331 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001332 if cmpr:
1333 buf = cmpr.flush()
1334 compress_size = compress_size + len(buf)
1335 self.fp.write(buf)
1336 zinfo.compress_size = compress_size
1337 else:
1338 zinfo.compress_size = file_size
1339 zinfo.CRC = CRC
1340 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001341 if not zip64 and self._allowZip64:
1342 if file_size > ZIP64_LIMIT:
1343 raise RuntimeError('File size has increased during compressing')
1344 if compress_size > ZIP64_LIMIT:
1345 raise RuntimeError('Compressed size larger than uncompressed size')
1346 # Seek backwards and write file header (which will now include
1347 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001348 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001349 self.fp.seek(zinfo.header_offset, 0)
1350 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001351 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 self.filelist.append(zinfo)
1353 self.NameToInfo[zinfo.filename] = zinfo
1354
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001355 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001356 """Write a file into the archive. The contents is 'data', which
1357 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1358 it is encoded as UTF-8 first.
1359 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001360 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001361 if isinstance(data, str):
1362 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001363 if not isinstance(zinfo_or_arcname, ZipInfo):
1364 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001365 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001366 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001367 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001368 else:
1369 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001370
1371 if not self.fp:
1372 raise RuntimeError(
1373 "Attempt to write to ZIP archive that was already closed")
1374
Guido van Rossum85825dc2007-08-27 17:03:28 +00001375 zinfo.file_size = len(data) # Uncompressed size
1376 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001377 if compress_type is not None:
1378 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001379 if zinfo.compress_type == ZIP_LZMA:
1380 # Compressed data includes an end-of-stream (EOS) marker
1381 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001382
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001383 self._writecheck(zinfo)
1384 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001385 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001386 co = _get_compressor(zinfo.compress_type)
1387 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001388 data = co.compress(data) + co.flush()
1389 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 else:
1391 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001392 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1393 zinfo.compress_size > ZIP64_LIMIT
1394 if zip64 and not self._allowZip64:
1395 raise LargeZipFile("Filesize would require ZIP64 extensions")
1396 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001397 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001398 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001399 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001400 fmt = '<LQQ' if zip64 else '<LLL'
1401 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001402 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001403 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001404 self.filelist.append(zinfo)
1405 self.NameToInfo[zinfo.filename] = zinfo
1406
1407 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001408 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001409 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410
1411 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001412 """Close the file, and for mode "w" and "a" write the ending
1413 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001414 if self.fp is None:
1415 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001416
Antoine Pitrou17babc52012-11-17 23:50:08 +01001417 try:
1418 if self.mode in ("w", "a") and self._didModify: # write ending records
1419 count = 0
1420 pos1 = self.fp.tell()
1421 for zinfo in self.filelist: # write central directory
1422 count = count + 1
1423 dt = zinfo.date_time
1424 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1425 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1426 extra = []
1427 if zinfo.file_size > ZIP64_LIMIT \
1428 or zinfo.compress_size > ZIP64_LIMIT:
1429 extra.append(zinfo.file_size)
1430 extra.append(zinfo.compress_size)
1431 file_size = 0xffffffff
1432 compress_size = 0xffffffff
1433 else:
1434 file_size = zinfo.file_size
1435 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001436
Antoine Pitrou17babc52012-11-17 23:50:08 +01001437 if zinfo.header_offset > ZIP64_LIMIT:
1438 extra.append(zinfo.header_offset)
1439 header_offset = 0xffffffff
1440 else:
1441 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001442
Antoine Pitrou17babc52012-11-17 23:50:08 +01001443 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001444 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001445 if extra:
1446 # Append a ZIP64 field to the extra's
1447 extra_data = struct.pack(
1448 '<HH' + 'Q'*len(extra),
1449 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001450
Antoine Pitrou8572da52012-11-17 23:52:05 +01001451 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001452
Antoine Pitrou8572da52012-11-17 23:52:05 +01001453 if zinfo.compress_type == ZIP_BZIP2:
1454 min_version = max(BZIP2_VERSION, min_version)
1455 elif zinfo.compress_type == ZIP_LZMA:
1456 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001457
Antoine Pitrou8572da52012-11-17 23:52:05 +01001458 extract_version = max(min_version, zinfo.extract_version)
1459 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001460 try:
1461 filename, flag_bits = zinfo._encodeFilenameFlags()
1462 centdir = struct.pack(structCentralDir,
1463 stringCentralDir, create_version,
1464 zinfo.create_system, extract_version, zinfo.reserved,
1465 flag_bits, zinfo.compress_type, dostime, dosdate,
1466 zinfo.CRC, compress_size, file_size,
1467 len(filename), len(extra_data), len(zinfo.comment),
1468 0, zinfo.internal_attr, zinfo.external_attr,
1469 header_offset)
1470 except DeprecationWarning:
1471 print((structCentralDir, stringCentralDir, create_version,
1472 zinfo.create_system, extract_version, zinfo.reserved,
1473 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1474 zinfo.CRC, compress_size, file_size,
1475 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1476 0, zinfo.internal_attr, zinfo.external_attr,
1477 header_offset), file=sys.stderr)
1478 raise
1479 self.fp.write(centdir)
1480 self.fp.write(filename)
1481 self.fp.write(extra_data)
1482 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001483
Antoine Pitrou17babc52012-11-17 23:50:08 +01001484 pos2 = self.fp.tell()
1485 # Write end-of-zip-archive record
1486 centDirCount = count
1487 centDirSize = pos2 - pos1
1488 centDirOffset = pos1
1489 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1490 centDirOffset > ZIP64_LIMIT or
1491 centDirSize > ZIP64_LIMIT):
1492 # Need to write the ZIP64 end-of-archive records
1493 zip64endrec = struct.pack(
1494 structEndArchive64, stringEndArchive64,
1495 44, 45, 45, 0, 0, centDirCount, centDirCount,
1496 centDirSize, centDirOffset)
1497 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001498
Antoine Pitrou17babc52012-11-17 23:50:08 +01001499 zip64locrec = struct.pack(
1500 structEndArchive64Locator,
1501 stringEndArchive64Locator, 0, pos2, 1)
1502 self.fp.write(zip64locrec)
1503 centDirCount = min(centDirCount, 0xFFFF)
1504 centDirSize = min(centDirSize, 0xFFFFFFFF)
1505 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001506
Antoine Pitrou17babc52012-11-17 23:50:08 +01001507 endrec = struct.pack(structEndArchive, stringEndArchive,
1508 0, 0, centDirCount, centDirCount,
1509 centDirSize, centDirOffset, len(self._comment))
1510 self.fp.write(endrec)
1511 self.fp.write(self._comment)
1512 self.fp.flush()
1513 finally:
1514 fp = self.fp
1515 self.fp = None
1516 if not self._filePassed:
1517 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001518
1519
1520class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001521 """Class to create ZIP archives with Python library files and packages."""
1522
Georg Brandl8334fd92010-12-04 10:26:46 +00001523 def __init__(self, file, mode="r", compression=ZIP_STORED,
1524 allowZip64=False, optimize=-1):
1525 ZipFile.__init__(self, file, mode=mode, compression=compression,
1526 allowZip64=allowZip64)
1527 self._optimize = optimize
1528
Georg Brandlfe991052009-09-16 15:54:04 +00001529 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001530 """Add all files from "pathname" to the ZIP archive.
1531
Fred Drake484d7352000-10-02 21:14:52 +00001532 If pathname is a package directory, search the directory and
1533 all package subdirectories recursively for all *.py and enter
1534 the modules into the archive. If pathname is a plain
1535 directory, listdir *.py and enter all modules. Else, pathname
1536 must be a Python *.py file and the module will be put into the
1537 archive. Added modules are always module.pyo or module.pyc.
1538 This method will compile the module.py into module.pyc if
1539 necessary.
1540 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001541 dir, name = os.path.split(pathname)
1542 if os.path.isdir(pathname):
1543 initname = os.path.join(pathname, "__init__.py")
1544 if os.path.isfile(initname):
1545 # This is a package directory, add it
1546 if basename:
1547 basename = "%s/%s" % (basename, name)
1548 else:
1549 basename = name
1550 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001551 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001552 fname, arcname = self._get_codename(initname[0:-3], basename)
1553 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001554 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001555 self.write(fname, arcname)
1556 dirlist = os.listdir(pathname)
1557 dirlist.remove("__init__.py")
1558 # Add all *.py files and package subdirectories
1559 for filename in dirlist:
1560 path = os.path.join(pathname, filename)
1561 root, ext = os.path.splitext(filename)
1562 if os.path.isdir(path):
1563 if os.path.isfile(os.path.join(path, "__init__.py")):
1564 # This is a package directory, add it
1565 self.writepy(path, basename) # Recursive call
1566 elif ext == ".py":
1567 fname, arcname = self._get_codename(path[0:-3],
1568 basename)
1569 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001570 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001571 self.write(fname, arcname)
1572 else:
1573 # This is NOT a package directory, add its files at top level
1574 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001575 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001576 for filename in os.listdir(pathname):
1577 path = os.path.join(pathname, filename)
1578 root, ext = os.path.splitext(filename)
1579 if ext == ".py":
1580 fname, arcname = self._get_codename(path[0:-3],
1581 basename)
1582 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001583 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001584 self.write(fname, arcname)
1585 else:
1586 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001587 raise RuntimeError(
1588 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001589 fname, arcname = self._get_codename(pathname[0:-3], basename)
1590 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001591 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001592 self.write(fname, arcname)
1593
1594 def _get_codename(self, pathname, basename):
1595 """Return (filename, archivename) for the path.
1596
Fred Drake484d7352000-10-02 21:14:52 +00001597 Given a module name path, return the correct file path and
1598 archive name, compiling if necessary. For example, given
1599 /python/lib/string, return (/python/lib/string.pyc, string).
1600 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001601 def _compile(file, optimize=-1):
1602 import py_compile
1603 if self.debug:
1604 print("Compiling", file)
1605 try:
1606 py_compile.compile(file, doraise=True, optimize=optimize)
1607 except py_compile.PyCompileError as error:
1608 print(err.msg)
1609 return False
1610 return True
1611
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001612 file_py = pathname + ".py"
1613 file_pyc = pathname + ".pyc"
1614 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001615 pycache_pyc = imp.cache_from_source(file_py, True)
1616 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001617 if self._optimize == -1:
1618 # legacy mode: use whatever file is present
1619 if (os.path.isfile(file_pyo) and
1620 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1621 # Use .pyo file.
1622 arcname = fname = file_pyo
1623 elif (os.path.isfile(file_pyc) and
1624 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1625 # Use .pyc file.
1626 arcname = fname = file_pyc
1627 elif (os.path.isfile(pycache_pyc) and
1628 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1629 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1630 # file name in the archive.
1631 fname = pycache_pyc
1632 arcname = file_pyc
1633 elif (os.path.isfile(pycache_pyo) and
1634 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1635 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1636 # file name in the archive.
1637 fname = pycache_pyo
1638 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001639 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001640 # Compile py into PEP 3147 pyc file.
1641 if _compile(file_py):
1642 fname = (pycache_pyc if __debug__ else pycache_pyo)
1643 arcname = (file_pyc if __debug__ else file_pyo)
1644 else:
1645 fname = arcname = file_py
1646 else:
1647 # new mode: use given optimization level
1648 if self._optimize == 0:
1649 fname = pycache_pyc
1650 arcname = file_pyc
1651 else:
1652 fname = pycache_pyo
1653 arcname = file_pyo
1654 if not (os.path.isfile(fname) and
1655 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1656 if not _compile(file_py, optimize=self._optimize):
1657 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001658 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001659 if basename:
1660 archivename = "%s/%s" % (basename, archivename)
1661 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001662
1663
1664def main(args = None):
1665 import textwrap
1666 USAGE=textwrap.dedent("""\
1667 Usage:
1668 zipfile.py -l zipfile.zip # Show listing of a zipfile
1669 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1670 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1671 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1672 """)
1673 if args is None:
1674 args = sys.argv[1:]
1675
1676 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001677 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001678 sys.exit(1)
1679
1680 if args[0] == '-l':
1681 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001682 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001683 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001684 with ZipFile(args[1], 'r') as zf:
1685 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001686
1687 elif args[0] == '-t':
1688 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001689 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001690 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001691 with ZipFile(args[1], 'r') as zf:
1692 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001693 if badfile:
1694 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001695 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001696
1697 elif args[0] == '-e':
1698 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001699 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001700 sys.exit(1)
1701
Antoine Pitrou17babc52012-11-17 23:50:08 +01001702 with ZipFile(args[1], 'r') as zf:
1703 out = args[2]
1704 for path in zf.namelist():
1705 if path.startswith('./'):
1706 tgt = os.path.join(out, path[2:])
1707 else:
1708 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001709
Antoine Pitrou17babc52012-11-17 23:50:08 +01001710 tgtdir = os.path.dirname(tgt)
1711 if not os.path.exists(tgtdir):
1712 os.makedirs(tgtdir)
1713 with open(tgt, 'wb') as fp:
1714 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001715
1716 elif args[0] == '-c':
1717 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001718 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001719 sys.exit(1)
1720
1721 def addToZip(zf, path, zippath):
1722 if os.path.isfile(path):
1723 zf.write(path, zippath, ZIP_DEFLATED)
1724 elif os.path.isdir(path):
1725 for nm in os.listdir(path):
1726 addToZip(zf,
1727 os.path.join(path, nm), os.path.join(zippath, nm))
1728 # else: ignore
1729
Antoine Pitrou17babc52012-11-17 23:50:08 +01001730 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1731 for src in args[2:]:
1732 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001733
1734if __name__ == "__main__":
1735 main()