blob: a2791cd30eb45fd0f8fc7fad7779d958329fb5ae [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
32except ImportError:
33 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_FILECOUNT_LIMIT = 1 << 16
54ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200167 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200183 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000199 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
200 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 return endrec
202
203 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000204 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000205
206 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000207 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
208 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209 sig, sz, create_version, read_version, disk_num, disk_dir, \
210 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000211 struct.unpack(structEndArchive64, data)
212 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 return endrec
214
215 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000216 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000217 endrec[_ECD_DISK_NUMBER] = disk_num
218 endrec[_ECD_DISK_START] = disk_dir
219 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
220 endrec[_ECD_ENTRIES_TOTAL] = dircount2
221 endrec[_ECD_SIZE] = dirsize
222 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000223 return endrec
224
225
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000226def _EndRecData(fpin):
227 """Return data from the "End of Central Directory" record, or None.
228
229 The data is a list of the nine items in the ZIP "End of central dir"
230 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231
232 # Determine file size
233 fpin.seek(0, 2)
234 filesize = fpin.tell()
235
236 # Check to see if this is ZIP file with no archive comment (the
237 # "end of central directory" structure should be the last item in the
238 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000239 try:
240 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200241 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000242 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000246 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec=list(endrec)
248
249 # Append a blank comment and record start offset
250 endrec.append(b"")
251 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255
256 # Either this is not a ZIP file, or it is a ZIP file with an archive
257 # comment. Search the end of the file for the "end of central directory"
258 # record signature. The comment is the last item in the ZIP file and may be
259 # up to 64K long. It is assumed that the "end of central directory" magic
260 # number does not appear in the comment.
261 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
262 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000263 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000264 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265 if start >= 0:
266 # found the magic number; attempt to unpack and interpret
267 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000268 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400269 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
270 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
271 endrec.append(comment)
272 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000273
R David Murray4fbb9db2011-06-09 15:50:51 -0400274 # Try to read the "Zip64 end of central directory" structure
275 return _EndRecData64(fpin, maxCommentStart + start - filesize,
276 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000277
278 # Unable to find a valid end of central directory structure
279 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000280
Fred Drake484d7352000-10-02 21:14:52 +0000281
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000283 """Class with attributes describing each file in the ZIP archive."""
284
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 __slots__ = (
286 'orig_filename',
287 'filename',
288 'date_time',
289 'compress_type',
290 'comment',
291 'extra',
292 'create_system',
293 'create_version',
294 'extract_version',
295 'reserved',
296 'flag_bits',
297 'volume',
298 'internal_attr',
299 'external_attr',
300 'header_offset',
301 'CRC',
302 'compress_size',
303 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000304 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000305 )
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000308 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000309
310 # Terminate the file name at the first null byte. Null bytes in file
311 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000312 null_byte = filename.find(chr(0))
313 if null_byte >= 0:
314 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315 # This is used to ensure paths in generated ZIP files always use
316 # forward slashes as the directory separator, as required by the
317 # ZIP format specification.
318 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000319 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000322 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800323
324 if date_time[0] < 1980:
325 raise ValueError('ZIP does not support timestamps before 1980')
326
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000328 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000329 self.comment = b"" # Comment for each file
330 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000331 if sys.platform == 'win32':
332 self.create_system = 0 # System which created ZIP archive
333 else:
334 # Assume everything else is unix-y
335 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200336 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
337 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.reserved = 0 # Must be zero
339 self.flag_bits = 0 # ZIP flag bits
340 self.volume = 0 # Volume number of file header
341 self.internal_attr = 0 # Internal attributes
342 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000344 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000345 # CRC CRC-32 of the uncompressed file
346 # compress_size Size of the compressed file
347 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348
349 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000350 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000351 dt = self.date_time
352 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000353 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000354 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000355 # Set these to zero because we write them after the file data
356 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 else:
Tim Peterse1190062001-01-15 03:34:38 +0000358 CRC = self.CRC
359 compress_size = self.compress_size
360 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000361
362 extra = self.extra
363
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200364 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
366 # File is larger than what fits into a 4 byte integer,
367 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 extra = extra + struct.pack(fmt,
370 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000371 file_size = 0xffffffff
372 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200375 if self.compress_type == ZIP_BZIP2:
376 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200377 elif self.compress_type == ZIP_LZMA:
378 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200379
380 self.extract_version = max(min_version, self.extract_version)
381 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000382 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000383 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000384 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 self.compress_type, dostime, dosdate, CRC,
386 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000387 len(filename), len(extra))
388 return header + filename + extra
389
390 def _encodeFilenameFlags(self):
391 try:
392 return self.filename.encode('ascii'), self.flag_bits
393 except UnicodeEncodeError:
394 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395
396 def _decodeExtra(self):
397 # Try to decode the extra field.
398 extra = self.extra
399 unpack = struct.unpack
400 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000401 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402 if tp == 1:
403 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000404 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000405 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000406 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000408 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409 elif ln == 0:
410 counts = ()
411 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000412 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413
414 idx = 0
415
416 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000417 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000418 self.file_size = counts[idx]
419 idx += 1
420
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000421 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000422 self.compress_size = counts[idx]
423 idx += 1
424
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000425 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000426 old = self.header_offset
427 self.header_offset = counts[idx]
428 idx+=1
429
430 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000431
432
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433class _ZipDecrypter:
434 """Class to handle decryption of files stored within a ZIP archive.
435
436 ZIP supports a password-based form of encryption. Even though known
437 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000438 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 Usage:
441 zd = _ZipDecrypter(mypwd)
442 plain_char = zd(cypher_char)
443 plain_text = map(zd, cypher_text)
444 """
445
446 def _GenerateCRCTable():
447 """Generate a CRC-32 table.
448
449 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
450 internal keys. We noticed that a direct implementation is faster than
451 relying on binascii.crc32().
452 """
453 poly = 0xedb88320
454 table = [0] * 256
455 for i in range(256):
456 crc = i
457 for j in range(8):
458 if crc & 1:
459 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
460 else:
461 crc = ((crc >> 1) & 0x7FFFFFFF)
462 table[i] = crc
463 return table
464 crctable = _GenerateCRCTable()
465
466 def _crc32(self, ch, crc):
467 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000469
470 def __init__(self, pwd):
471 self.key0 = 305419896
472 self.key1 = 591751049
473 self.key2 = 878082192
474 for p in pwd:
475 self._UpdateKeys(p)
476
477 def _UpdateKeys(self, c):
478 self.key0 = self._crc32(c, self.key0)
479 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
480 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000481 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000482
483 def __call__(self, c):
484 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000485 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000486 k = self.key2 | 2
487 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000488 self._UpdateKeys(c)
489 return c
490
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200491
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200492class LZMACompressor:
493
494 def __init__(self):
495 self._comp = None
496
497 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200498 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200499 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200500 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200501 ])
502 return struct.pack('<BBH', 9, 4, len(props)) + props
503
504 def compress(self, data):
505 if self._comp is None:
506 return self._init() + self._comp.compress(data)
507 return self._comp.compress(data)
508
509 def flush(self):
510 if self._comp is None:
511 return self._init() + self._comp.flush()
512 return self._comp.flush()
513
514
515class LZMADecompressor:
516
517 def __init__(self):
518 self._decomp = None
519 self._unconsumed = b''
520 self.eof = False
521
522 def decompress(self, data):
523 if self._decomp is None:
524 self._unconsumed += data
525 if len(self._unconsumed) <= 4:
526 return b''
527 psize, = struct.unpack('<H', self._unconsumed[2:4])
528 if len(self._unconsumed) <= 4 + psize:
529 return b''
530
531 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200532 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200533 self._unconsumed[4:4 + psize])
534 ])
535 data = self._unconsumed[4 + psize:]
536 del self._unconsumed
537
538 result = self._decomp.decompress(data)
539 self.eof = self._decomp.eof
540 return result
541
542
543compressor_names = {
544 0: 'store',
545 1: 'shrink',
546 2: 'reduce',
547 3: 'reduce',
548 4: 'reduce',
549 5: 'reduce',
550 6: 'implode',
551 7: 'tokenize',
552 8: 'deflate',
553 9: 'deflate64',
554 10: 'implode',
555 12: 'bzip2',
556 14: 'lzma',
557 18: 'terse',
558 19: 'lz77',
559 97: 'wavpack',
560 98: 'ppmd',
561}
562
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200563def _check_compression(compression):
564 if compression == ZIP_STORED:
565 pass
566 elif compression == ZIP_DEFLATED:
567 if not zlib:
568 raise RuntimeError(
569 "Compression requires the (missing) zlib module")
570 elif compression == ZIP_BZIP2:
571 if not bz2:
572 raise RuntimeError(
573 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200574 elif compression == ZIP_LZMA:
575 if not lzma:
576 raise RuntimeError(
577 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200578 else:
579 raise RuntimeError("That compression method is not supported")
580
581
582def _get_compressor(compress_type):
583 if compress_type == ZIP_DEFLATED:
584 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
585 zlib.DEFLATED, -15)
586 elif compress_type == ZIP_BZIP2:
587 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200588 elif compress_type == ZIP_LZMA:
589 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200590 else:
591 return None
592
593
594def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200595 if compress_type == ZIP_STORED:
596 return None
597 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200598 return zlib.decompressobj(-15)
599 elif compress_type == ZIP_BZIP2:
600 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200601 elif compress_type == ZIP_LZMA:
602 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200603 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200604 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200605 if descr:
606 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
607 else:
608 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609
610
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000611class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000612 """File-like object for reading an archive member.
613 Is returned by ZipFile.open().
614 """
615
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616 # Max size supported by decompressor.
617 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000619 # Read from compressed files in 4k blocks.
620 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000621
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 # Search for universal newlines or line chunks.
623 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
624
Łukasz Langae94980a2010-11-22 23:31:26 +0000625 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
626 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627 self._fileobj = fileobj
628 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000629 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630
Ezio Melotti92b47432010-01-28 01:44:41 +0000631 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000632 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200633 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000634
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200635 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000636
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 self._readbuffer = b''
639 self._offset = 0
640
641 self._universal = 'U' in mode
642 self.newlines = None
643
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644 # Adjust read size for encrypted files since the first 12 bytes
645 # are for the encryption/password information.
646 if self._decrypter is not None:
647 self._compress_left -= 12
648
649 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650 self.name = zipinfo.filename
651
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000652 if hasattr(zipinfo, 'CRC'):
653 self._expected_crc = zipinfo.CRC
654 self._running_crc = crc32(b'') & 0xffffffff
655 else:
656 self._expected_crc = None
657
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000658 def readline(self, limit=-1):
659 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000661 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000663
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000664 if not self._universal and limit < 0:
665 # Shortcut common case - newline found in buffer.
666 i = self._readbuffer.find(b'\n', self._offset) + 1
667 if i > 0:
668 line = self._readbuffer[self._offset: i]
669 self._offset = i
670 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000671
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000672 if not self._universal:
673 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000674
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000675 line = b''
676 while limit < 0 or len(line) < limit:
677 readahead = self.peek(2)
678 if readahead == b'':
679 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000680
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000681 #
682 # Search for universal newlines or line chunks.
683 #
684 # The pattern returns either a line chunk or a newline, but not
685 # both. Combined with peek(2), we are assured that the sequence
686 # '\r\n' is always retrieved completely and never split into
687 # separate newlines - '\r', '\n' due to coincidental readaheads.
688 #
689 match = self.PATTERN.search(readahead)
690 newline = match.group('newline')
691 if newline is not None:
692 if self.newlines is None:
693 self.newlines = []
694 if newline not in self.newlines:
695 self.newlines.append(newline)
696 self._offset += len(newline)
697 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 chunk = match.group('chunk')
700 if limit >= 0:
701 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000702
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000703 self._offset += len(chunk)
704 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000705
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000706 return line
707
708 def peek(self, n=1):
709 """Returns buffered bytes without advancing the position."""
710 if n > len(self._readbuffer) - self._offset:
711 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200712 if len(chunk) > self._offset:
713 self._readbuffer = chunk + self._readbuffer[self._offset:]
714 self._offset = 0
715 else:
716 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000717
718 # Return up to 512 bytes to reduce allocation overhead for tight loops.
719 return self._readbuffer[self._offset: self._offset + 512]
720
721 def readable(self):
722 return True
723
724 def read(self, n=-1):
725 """Read and return up to n bytes.
726 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000727 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200728 if n is None or n < 0:
729 buf = self._readbuffer[self._offset:]
730 self._readbuffer = b''
731 self._offset = 0
732 while not self._eof:
733 buf += self._read1(self.MAX_N)
734 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000735
Antoine Pitrou78157b32012-06-23 16:44:48 +0200736 end = n + self._offset
737 if end < len(self._readbuffer):
738 buf = self._readbuffer[self._offset:end]
739 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200740 return buf
741
Antoine Pitrou78157b32012-06-23 16:44:48 +0200742 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200743 buf = self._readbuffer[self._offset:]
744 self._readbuffer = b''
745 self._offset = 0
746 while n > 0 and not self._eof:
747 data = self._read1(n)
748 if n < len(data):
749 self._readbuffer = data
750 self._offset = n
751 buf += data[:n]
752 break
753 buf += data
754 n -= len(data)
755 return buf
756
757 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000758 # Update the CRC using the given data.
759 if self._expected_crc is None:
760 # No need to compute the CRC if we don't have a reference value
761 return
762 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
763 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200764 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000765 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000766
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000767 def read1(self, n):
768 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200770 if n is None or n < 0:
771 buf = self._readbuffer[self._offset:]
772 self._readbuffer = b''
773 self._offset = 0
774 data = self._read1(self.MAX_N)
775 buf += data
776 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777
Antoine Pitrou78157b32012-06-23 16:44:48 +0200778 end = n + self._offset
779 if end < len(self._readbuffer):
780 buf = self._readbuffer[self._offset:end]
781 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200782 return buf
783
Antoine Pitrou78157b32012-06-23 16:44:48 +0200784 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200785 buf = self._readbuffer[self._offset:]
786 self._readbuffer = b''
787 self._offset = 0
788 if n > 0:
789 data = self._read1(n)
790 if n < len(data):
791 self._readbuffer = data
792 self._offset = n
793 data = data[:n]
794 buf += data
795 return buf
796
797 def _read1(self, n):
798 # Read up to n compressed bytes with at most one read() system call,
799 # decrypt and decompress them.
800 if self._eof or n <= 0:
801 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000803 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200804 if self._compress_type == ZIP_DEFLATED:
805 ## Handle unconsumed data.
806 data = self._decompressor.unconsumed_tail
807 if n > len(data):
808 data += self._read2(n - len(data))
809 else:
810 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200812 if self._compress_type == ZIP_STORED:
813 self._eof = self._compress_left <= 0
814 elif self._compress_type == ZIP_DEFLATED:
815 n = max(n, self.MIN_READ_SIZE)
816 data = self._decompressor.decompress(data, n)
817 self._eof = (self._decompressor.eof or
818 self._compress_left <= 0 and
819 not self._decompressor.unconsumed_tail)
820 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000821 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200822 else:
823 data = self._decompressor.decompress(data)
824 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200826 data = data[:self._left]
827 self._left -= len(data)
828 if self._left <= 0:
829 self._eof = True
830 self._update_crc(data)
831 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000832
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200833 def _read2(self, n):
834 if self._compress_left <= 0:
835 return b''
836
837 n = max(n, self.MIN_READ_SIZE)
838 n = min(n, self._compress_left)
839
840 data = self._fileobj.read(n)
841 self._compress_left -= len(data)
842
843 if self._decrypter is not None:
844 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000845 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Łukasz Langae94980a2010-11-22 23:31:26 +0000847 def close(self):
848 try:
849 if self._close_fileobj:
850 self._fileobj.close()
851 finally:
852 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000854
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000856 """ Class with methods to open, read, write, close, list zip files.
857
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000858 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000859
Fred Drake3d9091e2001-03-26 15:49:24 +0000860 file: Either the path to the file, or a file-like object.
861 If it is a path, the file will be opened and closed by ZipFile.
862 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200863 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
864 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000865 allowZip64: if True ZipFile will create files with ZIP64 extensions when
866 needed, otherwise it will raise an exception when this would
867 be necessary.
868
Fred Drake3d9091e2001-03-26 15:49:24 +0000869 """
Fred Drake484d7352000-10-02 21:14:52 +0000870
Fred Drake90eac282001-02-28 05:29:34 +0000871 fp = None # Set here since __del__ checks it
872
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000873 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000874 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000875 if mode not in ("r", "w", "a"):
876 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
877
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200878 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000879
880 self._allowZip64 = allowZip64
881 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000882 self.debug = 0 # Level of printing: 0 through 3
883 self.NameToInfo = {} # Find file info given name
884 self.filelist = [] # List of ZipInfo instances for archive
885 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000886 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000887 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400888 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000889
Fred Drake3d9091e2001-03-26 15:49:24 +0000890 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000891 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000892 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000893 self._filePassed = 0
894 self.filename = file
895 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000896 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000897 self.fp = io.open(file, modeDict[mode])
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200898 except OSError:
Thomas Wouterscf297e42007-02-23 15:07:44 +0000899 if mode == 'a':
900 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000901 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000902 else:
903 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000904 else:
905 self._filePassed = 1
906 self.fp = file
907 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000908
Antoine Pitrou17babc52012-11-17 23:50:08 +0100909 try:
910 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000911 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100912 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000913 # set the modified flag so central directory gets written
914 # even if no files are added to the archive
915 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100916 elif key == 'a':
917 try:
918 # See if file is a zip file
919 self._RealGetContents()
920 # seek to start of directory and overwrite
921 self.fp.seek(self.start_dir, 0)
922 except BadZipFile:
923 # file is not a zip file, just append
924 self.fp.seek(0, 2)
925
926 # set the modified flag so central directory gets written
927 # even if no files are added to the archive
928 self._didModify = True
929 else:
930 raise RuntimeError('Mode must be "r", "w" or "a"')
931 except:
932 fp = self.fp
933 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000934 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100935 fp.close()
936 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000937
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000938 def __enter__(self):
939 return self
940
941 def __exit__(self, type, value, traceback):
942 self.close()
943
Tim Peters7d3bad62001-04-04 18:56:49 +0000944 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000945 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000946 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000947 try:
948 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200949 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +0000950 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000951 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000952 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000953 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000954 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000955 size_cd = endrec[_ECD_SIZE] # bytes in central directory
956 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400957 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000958
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000959 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000960 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000961 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
962 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000963 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
964
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000966 inferred = concat + offset_cd
967 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000968 # self.start_dir: Position of start of central directory
969 self.start_dir = offset_cd + concat
970 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000971 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000972 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000973 total = 0
974 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000975 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000976 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000977 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000978 centdir = struct.unpack(structCentralDir, centdir)
979 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000980 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000981 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000982 flags = centdir[5]
983 if flags & 0x800:
984 # UTF-8 file names extension
985 filename = filename.decode('utf-8')
986 else:
987 # Historical ZIP filename encoding
988 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000990 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000991 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
992 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000993 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 (x.create_version, x.create_system, x.extract_version, x.reserved,
995 x.flag_bits, x.compress_type, t, d,
996 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +0200997 if x.extract_version > MAX_EXTRACT_VERSION:
998 raise NotImplementedError("zip file version %.1f" %
999 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1001 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001002 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +00001004 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001005
1006 x._decodeExtra()
1007 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 self.filelist.append(x)
1009 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001010
1011 # update total bytes read from central directory
1012 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1013 + centdir[_CD_EXTRA_FIELD_LENGTH]
1014 + centdir[_CD_COMMENT_LENGTH])
1015
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001017 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001018
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019
1020 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001021 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001022 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023
1024 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001025 """Return a list of class ZipInfo instances for files in the
1026 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027 return self.filelist
1028
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001029 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001030 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001031 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1032 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001033 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001034 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001035 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1036 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001037
1038 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001039 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001040 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 for zinfo in self.filelist:
1042 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001043 # Read by chunks, to avoid an OverflowError or a
1044 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001045 with self.open(zinfo.filename, "r") as f:
1046 while f.read(chunk_size): # Check CRC-32
1047 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001048 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001049 return zinfo.filename
1050
1051 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001052 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001053 info = self.NameToInfo.get(name)
1054 if info is None:
1055 raise KeyError(
1056 'There is no item named %r in the archive' % name)
1057
1058 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059
Thomas Wouterscf297e42007-02-23 15:07:44 +00001060 def setpassword(self, pwd):
1061 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001062 if pwd and not isinstance(pwd, bytes):
1063 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1064 if pwd:
1065 self.pwd = pwd
1066 else:
1067 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001068
R David Murrayf50b38a2012-04-12 18:44:58 -04001069 @property
1070 def comment(self):
1071 """The comment text associated with the ZIP file."""
1072 return self._comment
1073
1074 @comment.setter
1075 def comment(self, comment):
1076 if not isinstance(comment, bytes):
1077 raise TypeError("comment: expected bytes, got %s" % type(comment))
1078 # check for valid comment length
1079 if len(comment) >= ZIP_MAX_COMMENT:
1080 if self.debug:
1081 print('Archive comment is too long; truncating to %d bytes'
1082 % ZIP_MAX_COMMENT)
1083 comment = comment[:ZIP_MAX_COMMENT]
1084 self._comment = comment
1085 self._didModify = True
1086
Thomas Wouterscf297e42007-02-23 15:07:44 +00001087 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001088 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001089 with self.open(name, "r", pwd) as fp:
1090 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001091
1092 def open(self, name, mode="r", pwd=None):
1093 """Return file-like object for 'name'."""
1094 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001095 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001096 if pwd and not isinstance(pwd, bytes):
1097 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001099 raise RuntimeError(
1100 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001101
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 # Only open a new file for instances where we were not
1103 # given a file object in the constructor
1104 if self._filePassed:
1105 zef_file = self.fp
1106 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001107 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001108
Antoine Pitrou17babc52012-11-17 23:50:08 +01001109 try:
1110 # Make sure we have an info object
1111 if isinstance(name, ZipInfo):
1112 # 'name' is already an info object
1113 zinfo = name
1114 else:
1115 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001116 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001117 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001118
Antoine Pitrou17babc52012-11-17 23:50:08 +01001119 # Skip the file header:
1120 fheader = zef_file.read(sizeFileHeader)
1121 if fheader[0:4] != stringFileHeader:
1122 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001123
Antoine Pitrou17babc52012-11-17 23:50:08 +01001124 fheader = struct.unpack(structFileHeader, fheader)
1125 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1126 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1127 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001128
Antoine Pitrou8572da52012-11-17 23:52:05 +01001129 if zinfo.flag_bits & 0x20:
1130 # Zip 2.7: compressed patched data
1131 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001132
Antoine Pitrou8572da52012-11-17 23:52:05 +01001133 if zinfo.flag_bits & 0x40:
1134 # strong encryption
1135 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001136
Antoine Pitrou17babc52012-11-17 23:50:08 +01001137 if zinfo.flag_bits & 0x800:
1138 # UTF-8 filename
1139 fname_str = fname.decode("utf-8")
1140 else:
1141 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001142
Antoine Pitrou17babc52012-11-17 23:50:08 +01001143 if fname_str != zinfo.orig_filename:
1144 raise BadZipFile(
1145 'File name in directory %r and header %r differ.'
1146 % (zinfo.orig_filename, fname))
1147
1148 # check for encrypted flag & handle password
1149 is_encrypted = zinfo.flag_bits & 0x1
1150 zd = None
1151 if is_encrypted:
1152 if not pwd:
1153 pwd = self.pwd
1154 if not pwd:
1155 raise RuntimeError("File %s is encrypted, password "
1156 "required for extraction" % name)
1157
1158 zd = _ZipDecrypter(pwd)
1159 # The first 12 bytes in the cypher stream is an encryption header
1160 # used to strengthen the algorithm. The first 11 bytes are
1161 # completely random, while the 12th contains the MSB of the CRC,
1162 # or the MSB of the file time depending on the header type
1163 # and is used to check the correctness of the password.
1164 header = zef_file.read(12)
1165 h = list(map(zd, header[0:12]))
1166 if zinfo.flag_bits & 0x8:
1167 # compare against the file type from extended local headers
1168 check_byte = (zinfo._raw_time >> 8) & 0xff
1169 else:
1170 # compare against the CRC otherwise
1171 check_byte = (zinfo.CRC >> 24) & 0xff
1172 if h[11] != check_byte:
1173 raise RuntimeError("Bad password for file", name)
1174
1175 return ZipExtFile(zef_file, mode, zinfo, zd,
1176 close_fileobj=not self._filePassed)
1177 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001178 if not self._filePassed:
1179 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001180 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181
Christian Heimes790c8232008-01-07 21:14:23 +00001182 def extract(self, member, path=None, pwd=None):
1183 """Extract a member from the archive to the current working directory,
1184 using its full name. Its file information is extracted as accurately
1185 as possible. `member' may be a filename or a ZipInfo object. You can
1186 specify a different directory using `path'.
1187 """
1188 if not isinstance(member, ZipInfo):
1189 member = self.getinfo(member)
1190
1191 if path is None:
1192 path = os.getcwd()
1193
1194 return self._extract_member(member, path, pwd)
1195
1196 def extractall(self, path=None, members=None, pwd=None):
1197 """Extract all members from the archive to the current working
1198 directory. `path' specifies a different directory to extract to.
1199 `members' is optional and must be a subset of the list returned
1200 by namelist().
1201 """
1202 if members is None:
1203 members = self.namelist()
1204
1205 for zipinfo in members:
1206 self.extract(zipinfo, path, pwd)
1207
1208 def _extract_member(self, member, targetpath, pwd):
1209 """Extract the ZipInfo object 'member' to a physical
1210 file on the path targetpath.
1211 """
1212 # build the destination pathname, replacing
1213 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001214 # Strip trailing path separator, unless it represents the root.
1215 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1216 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001217 targetpath = targetpath[:-1]
1218
1219 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001220 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001221 targetpath = os.path.join(targetpath, member.filename[1:])
1222 else:
1223 targetpath = os.path.join(targetpath, member.filename)
1224
1225 targetpath = os.path.normpath(targetpath)
1226
1227 # Create all upper directories if necessary.
1228 upperdirs = os.path.dirname(targetpath)
1229 if upperdirs and not os.path.exists(upperdirs):
1230 os.makedirs(upperdirs)
1231
Martin v. Löwis59e47792009-01-24 14:10:07 +00001232 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001233 if not os.path.isdir(targetpath):
1234 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001235 return targetpath
1236
Antoine Pitrou17babc52012-11-17 23:50:08 +01001237 with self.open(member, pwd=pwd) as source, \
1238 open(targetpath, "wb") as target:
1239 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001240
1241 return targetpath
1242
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001244 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001245 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001246 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001247 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001249 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001250 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001251 raise RuntimeError(
1252 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001253 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001254 if zinfo.file_size > ZIP64_LIMIT:
1255 if not self._allowZip64:
1256 raise LargeZipFile("Filesize would require ZIP64 extensions")
1257 if zinfo.header_offset > ZIP64_LIMIT:
1258 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001259 raise LargeZipFile(
1260 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261
1262 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001263 """Put the bytes from filename into the archive under the name
1264 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001265 if not self.fp:
1266 raise RuntimeError(
1267 "Attempt to write to ZIP archive that was already closed")
1268
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001269 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001270 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001271 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272 date_time = mtime[0:6]
1273 # Create ZipInfo instance to store file information
1274 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001275 arcname = filename
1276 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1277 while arcname[0] in (os.sep, os.altsep):
1278 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001279 if isdir:
1280 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001281 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001282 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001284 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285 else:
Tim Peterse1190062001-01-15 03:34:38 +00001286 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001287
1288 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001289 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001290 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001291 if zinfo.compress_type == ZIP_LZMA:
1292 # Compressed data includes an end-of-stream (EOS) marker
1293 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001294
1295 self._writecheck(zinfo)
1296 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001297
1298 if isdir:
1299 zinfo.file_size = 0
1300 zinfo.compress_size = 0
1301 zinfo.CRC = 0
1302 self.filelist.append(zinfo)
1303 self.NameToInfo[zinfo.filename] = zinfo
1304 self.fp.write(zinfo.FileHeader())
1305 return
1306
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001307 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001308 with open(filename, "rb") as fp:
1309 # Must overwrite CRC and sizes with correct data later
1310 zinfo.CRC = CRC = 0
1311 zinfo.compress_size = compress_size = 0
1312 zinfo.file_size = file_size = 0
1313 self.fp.write(zinfo.FileHeader())
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001314 while 1:
1315 buf = fp.read(1024 * 8)
1316 if not buf:
1317 break
1318 file_size = file_size + len(buf)
1319 CRC = crc32(buf, CRC) & 0xffffffff
1320 if cmpr:
1321 buf = cmpr.compress(buf)
1322 compress_size = compress_size + len(buf)
1323 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 if cmpr:
1325 buf = cmpr.flush()
1326 compress_size = compress_size + len(buf)
1327 self.fp.write(buf)
1328 zinfo.compress_size = compress_size
1329 else:
1330 zinfo.compress_size = file_size
1331 zinfo.CRC = CRC
1332 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001333 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001334 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001335 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001336 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001338 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 self.filelist.append(zinfo)
1340 self.NameToInfo[zinfo.filename] = zinfo
1341
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001342 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001343 """Write a file into the archive. The contents is 'data', which
1344 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1345 it is encoded as UTF-8 first.
1346 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001347 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001348 if isinstance(data, str):
1349 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001350 if not isinstance(zinfo_or_arcname, ZipInfo):
1351 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001352 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001353 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001354 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001355 else:
1356 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001357
1358 if not self.fp:
1359 raise RuntimeError(
1360 "Attempt to write to ZIP archive that was already closed")
1361
Guido van Rossum85825dc2007-08-27 17:03:28 +00001362 zinfo.file_size = len(data) # Uncompressed size
1363 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001364 if compress_type is not None:
1365 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001366 if zinfo.compress_type == ZIP_LZMA:
1367 # Compressed data includes an end-of-stream (EOS) marker
1368 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001369
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001370 self._writecheck(zinfo)
1371 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001372 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001373 co = _get_compressor(zinfo.compress_type)
1374 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001375 data = co.compress(data) + co.flush()
1376 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001377 else:
1378 zinfo.compress_size = zinfo.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001380 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001381 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001382 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001383 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001384 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001385 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386 self.filelist.append(zinfo)
1387 self.NameToInfo[zinfo.filename] = zinfo
1388
1389 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001390 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001391 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392
1393 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001394 """Close the file, and for mode "w" and "a" write the ending
1395 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001396 if self.fp is None:
1397 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001398
Antoine Pitrou17babc52012-11-17 23:50:08 +01001399 try:
1400 if self.mode in ("w", "a") and self._didModify: # write ending records
1401 count = 0
1402 pos1 = self.fp.tell()
1403 for zinfo in self.filelist: # write central directory
1404 count = count + 1
1405 dt = zinfo.date_time
1406 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1407 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1408 extra = []
1409 if zinfo.file_size > ZIP64_LIMIT \
1410 or zinfo.compress_size > ZIP64_LIMIT:
1411 extra.append(zinfo.file_size)
1412 extra.append(zinfo.compress_size)
1413 file_size = 0xffffffff
1414 compress_size = 0xffffffff
1415 else:
1416 file_size = zinfo.file_size
1417 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001418
Antoine Pitrou17babc52012-11-17 23:50:08 +01001419 if zinfo.header_offset > ZIP64_LIMIT:
1420 extra.append(zinfo.header_offset)
1421 header_offset = 0xffffffff
1422 else:
1423 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001424
Antoine Pitrou17babc52012-11-17 23:50:08 +01001425 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001426 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001427 if extra:
1428 # Append a ZIP64 field to the extra's
1429 extra_data = struct.pack(
1430 '<HH' + 'Q'*len(extra),
1431 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001432
Antoine Pitrou8572da52012-11-17 23:52:05 +01001433 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001434
Antoine Pitrou8572da52012-11-17 23:52:05 +01001435 if zinfo.compress_type == ZIP_BZIP2:
1436 min_version = max(BZIP2_VERSION, min_version)
1437 elif zinfo.compress_type == ZIP_LZMA:
1438 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001439
Antoine Pitrou8572da52012-11-17 23:52:05 +01001440 extract_version = max(min_version, zinfo.extract_version)
1441 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001442 try:
1443 filename, flag_bits = zinfo._encodeFilenameFlags()
1444 centdir = struct.pack(structCentralDir,
1445 stringCentralDir, create_version,
1446 zinfo.create_system, extract_version, zinfo.reserved,
1447 flag_bits, zinfo.compress_type, dostime, dosdate,
1448 zinfo.CRC, compress_size, file_size,
1449 len(filename), len(extra_data), len(zinfo.comment),
1450 0, zinfo.internal_attr, zinfo.external_attr,
1451 header_offset)
1452 except DeprecationWarning:
1453 print((structCentralDir, stringCentralDir, create_version,
1454 zinfo.create_system, extract_version, zinfo.reserved,
1455 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1456 zinfo.CRC, compress_size, file_size,
1457 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1458 0, zinfo.internal_attr, zinfo.external_attr,
1459 header_offset), file=sys.stderr)
1460 raise
1461 self.fp.write(centdir)
1462 self.fp.write(filename)
1463 self.fp.write(extra_data)
1464 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001465
Antoine Pitrou17babc52012-11-17 23:50:08 +01001466 pos2 = self.fp.tell()
1467 # Write end-of-zip-archive record
1468 centDirCount = count
1469 centDirSize = pos2 - pos1
1470 centDirOffset = pos1
1471 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1472 centDirOffset > ZIP64_LIMIT or
1473 centDirSize > ZIP64_LIMIT):
1474 # Need to write the ZIP64 end-of-archive records
1475 zip64endrec = struct.pack(
1476 structEndArchive64, stringEndArchive64,
1477 44, 45, 45, 0, 0, centDirCount, centDirCount,
1478 centDirSize, centDirOffset)
1479 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001480
Antoine Pitrou17babc52012-11-17 23:50:08 +01001481 zip64locrec = struct.pack(
1482 structEndArchive64Locator,
1483 stringEndArchive64Locator, 0, pos2, 1)
1484 self.fp.write(zip64locrec)
1485 centDirCount = min(centDirCount, 0xFFFF)
1486 centDirSize = min(centDirSize, 0xFFFFFFFF)
1487 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001488
Antoine Pitrou17babc52012-11-17 23:50:08 +01001489 endrec = struct.pack(structEndArchive, stringEndArchive,
1490 0, 0, centDirCount, centDirCount,
1491 centDirSize, centDirOffset, len(self._comment))
1492 self.fp.write(endrec)
1493 self.fp.write(self._comment)
1494 self.fp.flush()
1495 finally:
1496 fp = self.fp
1497 self.fp = None
1498 if not self._filePassed:
1499 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001500
1501
1502class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001503 """Class to create ZIP archives with Python library files and packages."""
1504
Georg Brandl8334fd92010-12-04 10:26:46 +00001505 def __init__(self, file, mode="r", compression=ZIP_STORED,
1506 allowZip64=False, optimize=-1):
1507 ZipFile.__init__(self, file, mode=mode, compression=compression,
1508 allowZip64=allowZip64)
1509 self._optimize = optimize
1510
Georg Brandlfe991052009-09-16 15:54:04 +00001511 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001512 """Add all files from "pathname" to the ZIP archive.
1513
Fred Drake484d7352000-10-02 21:14:52 +00001514 If pathname is a package directory, search the directory and
1515 all package subdirectories recursively for all *.py and enter
1516 the modules into the archive. If pathname is a plain
1517 directory, listdir *.py and enter all modules. Else, pathname
1518 must be a Python *.py file and the module will be put into the
1519 archive. Added modules are always module.pyo or module.pyc.
1520 This method will compile the module.py into module.pyc if
1521 necessary.
1522 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001523 dir, name = os.path.split(pathname)
1524 if os.path.isdir(pathname):
1525 initname = os.path.join(pathname, "__init__.py")
1526 if os.path.isfile(initname):
1527 # This is a package directory, add it
1528 if basename:
1529 basename = "%s/%s" % (basename, name)
1530 else:
1531 basename = name
1532 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001533 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001534 fname, arcname = self._get_codename(initname[0:-3], basename)
1535 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001536 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001537 self.write(fname, arcname)
1538 dirlist = os.listdir(pathname)
1539 dirlist.remove("__init__.py")
1540 # Add all *.py files and package subdirectories
1541 for filename in dirlist:
1542 path = os.path.join(pathname, filename)
1543 root, ext = os.path.splitext(filename)
1544 if os.path.isdir(path):
1545 if os.path.isfile(os.path.join(path, "__init__.py")):
1546 # This is a package directory, add it
1547 self.writepy(path, basename) # Recursive call
1548 elif ext == ".py":
1549 fname, arcname = self._get_codename(path[0:-3],
1550 basename)
1551 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001552 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001553 self.write(fname, arcname)
1554 else:
1555 # This is NOT a package directory, add its files at top level
1556 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001557 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001558 for filename in os.listdir(pathname):
1559 path = os.path.join(pathname, filename)
1560 root, ext = os.path.splitext(filename)
1561 if ext == ".py":
1562 fname, arcname = self._get_codename(path[0:-3],
1563 basename)
1564 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001565 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001566 self.write(fname, arcname)
1567 else:
1568 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001569 raise RuntimeError(
1570 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001571 fname, arcname = self._get_codename(pathname[0:-3], basename)
1572 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001573 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001574 self.write(fname, arcname)
1575
1576 def _get_codename(self, pathname, basename):
1577 """Return (filename, archivename) for the path.
1578
Fred Drake484d7352000-10-02 21:14:52 +00001579 Given a module name path, return the correct file path and
1580 archive name, compiling if necessary. For example, given
1581 /python/lib/string, return (/python/lib/string.pyc, string).
1582 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001583 def _compile(file, optimize=-1):
1584 import py_compile
1585 if self.debug:
1586 print("Compiling", file)
1587 try:
1588 py_compile.compile(file, doraise=True, optimize=optimize)
1589 except py_compile.PyCompileError as error:
1590 print(err.msg)
1591 return False
1592 return True
1593
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001594 file_py = pathname + ".py"
1595 file_pyc = pathname + ".pyc"
1596 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001597 pycache_pyc = imp.cache_from_source(file_py, True)
1598 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001599 if self._optimize == -1:
1600 # legacy mode: use whatever file is present
1601 if (os.path.isfile(file_pyo) and
1602 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1603 # Use .pyo file.
1604 arcname = fname = file_pyo
1605 elif (os.path.isfile(file_pyc) and
1606 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1607 # Use .pyc file.
1608 arcname = fname = file_pyc
1609 elif (os.path.isfile(pycache_pyc) and
1610 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1611 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1612 # file name in the archive.
1613 fname = pycache_pyc
1614 arcname = file_pyc
1615 elif (os.path.isfile(pycache_pyo) and
1616 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1617 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1618 # file name in the archive.
1619 fname = pycache_pyo
1620 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001621 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001622 # Compile py into PEP 3147 pyc file.
1623 if _compile(file_py):
1624 fname = (pycache_pyc if __debug__ else pycache_pyo)
1625 arcname = (file_pyc if __debug__ else file_pyo)
1626 else:
1627 fname = arcname = file_py
1628 else:
1629 # new mode: use given optimization level
1630 if self._optimize == 0:
1631 fname = pycache_pyc
1632 arcname = file_pyc
1633 else:
1634 fname = pycache_pyo
1635 arcname = file_pyo
1636 if not (os.path.isfile(fname) and
1637 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1638 if not _compile(file_py, optimize=self._optimize):
1639 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001640 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001641 if basename:
1642 archivename = "%s/%s" % (basename, archivename)
1643 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001644
1645
1646def main(args = None):
1647 import textwrap
1648 USAGE=textwrap.dedent("""\
1649 Usage:
1650 zipfile.py -l zipfile.zip # Show listing of a zipfile
1651 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1652 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1653 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1654 """)
1655 if args is None:
1656 args = sys.argv[1:]
1657
1658 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001659 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001660 sys.exit(1)
1661
1662 if args[0] == '-l':
1663 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001664 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001665 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001666 with ZipFile(args[1], 'r') as zf:
1667 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001668
1669 elif args[0] == '-t':
1670 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001671 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001672 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001673 with ZipFile(args[1], 'r') as zf:
1674 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001675 if badfile:
1676 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001677 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001678
1679 elif args[0] == '-e':
1680 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001681 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001682 sys.exit(1)
1683
Antoine Pitrou17babc52012-11-17 23:50:08 +01001684 with ZipFile(args[1], 'r') as zf:
1685 out = args[2]
1686 for path in zf.namelist():
1687 if path.startswith('./'):
1688 tgt = os.path.join(out, path[2:])
1689 else:
1690 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001691
Antoine Pitrou17babc52012-11-17 23:50:08 +01001692 tgtdir = os.path.dirname(tgt)
1693 if not os.path.exists(tgtdir):
1694 os.makedirs(tgtdir)
1695 with open(tgt, 'wb') as fp:
1696 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001697
1698 elif args[0] == '-c':
1699 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001700 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001701 sys.exit(1)
1702
1703 def addToZip(zf, path, zippath):
1704 if os.path.isfile(path):
1705 zf.write(path, zippath, ZIP_DEFLATED)
1706 elif os.path.isdir(path):
1707 for nm in os.listdir(path):
1708 addToZip(zf,
1709 os.path.join(path, nm), os.path.join(zippath, nm))
1710 # else: ignore
1711
Antoine Pitrou17babc52012-11-17 23:50:08 +01001712 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1713 for src in args[2:]:
1714 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001715
1716if __name__ == "__main__":
1717 main()