blob: 209dc4a0f30bf1a86d385306f38c4d0696859a23 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
32except ImportError:
33 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_FILECOUNT_LIMIT = 1 << 16
54ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000167 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
183 except IOError:
184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
193 except IOError:
194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000199 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
200 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 return endrec
202
203 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000204 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000205
206 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000207 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
208 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209 sig, sz, create_version, read_version, disk_num, disk_dir, \
210 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000211 struct.unpack(structEndArchive64, data)
212 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 return endrec
214
215 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000216 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000217 endrec[_ECD_DISK_NUMBER] = disk_num
218 endrec[_ECD_DISK_START] = disk_dir
219 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
220 endrec[_ECD_ENTRIES_TOTAL] = dircount2
221 endrec[_ECD_SIZE] = dirsize
222 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000223 return endrec
224
225
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000226def _EndRecData(fpin):
227 """Return data from the "End of Central Directory" record, or None.
228
229 The data is a list of the nine items in the ZIP "End of central dir"
230 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231
232 # Determine file size
233 fpin.seek(0, 2)
234 filesize = fpin.tell()
235
236 # Check to see if this is ZIP file with no archive comment (the
237 # "end of central directory" structure should be the last item in the
238 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000239 try:
240 fpin.seek(-sizeEndCentDir, 2)
241 except IOError:
242 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000246 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec=list(endrec)
248
249 # Append a blank comment and record start offset
250 endrec.append(b"")
251 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255
256 # Either this is not a ZIP file, or it is a ZIP file with an archive
257 # comment. Search the end of the file for the "end of central directory"
258 # record signature. The comment is the last item in the ZIP file and may be
259 # up to 64K long. It is assumed that the "end of central directory" magic
260 # number does not appear in the comment.
261 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
262 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000263 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000264 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265 if start >= 0:
266 # found the magic number; attempt to unpack and interpret
267 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000268 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400269 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
270 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
271 endrec.append(comment)
272 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000273
R David Murray4fbb9db2011-06-09 15:50:51 -0400274 # Try to read the "Zip64 end of central directory" structure
275 return _EndRecData64(fpin, maxCommentStart + start - filesize,
276 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000277
278 # Unable to find a valid end of central directory structure
279 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000280
Fred Drake484d7352000-10-02 21:14:52 +0000281
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000283 """Class with attributes describing each file in the ZIP archive."""
284
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 __slots__ = (
286 'orig_filename',
287 'filename',
288 'date_time',
289 'compress_type',
290 'comment',
291 'extra',
292 'create_system',
293 'create_version',
294 'extract_version',
295 'reserved',
296 'flag_bits',
297 'volume',
298 'internal_attr',
299 'external_attr',
300 'header_offset',
301 'CRC',
302 'compress_size',
303 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000304 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000305 )
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000308 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000309
310 # Terminate the file name at the first null byte. Null bytes in file
311 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000312 null_byte = filename.find(chr(0))
313 if null_byte >= 0:
314 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315 # This is used to ensure paths in generated ZIP files always use
316 # forward slashes as the directory separator, as required by the
317 # ZIP format specification.
318 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000319 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000322 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800323
324 if date_time[0] < 1980:
325 raise ValueError('ZIP does not support timestamps before 1980')
326
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000328 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000329 self.comment = b"" # Comment for each file
330 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000331 if sys.platform == 'win32':
332 self.create_system = 0 # System which created ZIP archive
333 else:
334 # Assume everything else is unix-y
335 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200336 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
337 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.reserved = 0 # Must be zero
339 self.flag_bits = 0 # ZIP flag bits
340 self.volume = 0 # Volume number of file header
341 self.internal_attr = 0 # Internal attributes
342 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000344 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000345 # CRC CRC-32 of the uncompressed file
346 # compress_size Size of the compressed file
347 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348
349 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000350 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000351 dt = self.date_time
352 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000353 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000354 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000355 # Set these to zero because we write them after the file data
356 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 else:
Tim Peterse1190062001-01-15 03:34:38 +0000358 CRC = self.CRC
359 compress_size = self.compress_size
360 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000361
362 extra = self.extra
363
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200364 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
366 # File is larger than what fits into a 4 byte integer,
367 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 extra = extra + struct.pack(fmt,
370 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000371 file_size = 0xffffffff
372 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200375 if self.compress_type == ZIP_BZIP2:
376 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200377 elif self.compress_type == ZIP_LZMA:
378 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200379
380 self.extract_version = max(min_version, self.extract_version)
381 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000382 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000383 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000384 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 self.compress_type, dostime, dosdate, CRC,
386 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000387 len(filename), len(extra))
388 return header + filename + extra
389
390 def _encodeFilenameFlags(self):
391 try:
392 return self.filename.encode('ascii'), self.flag_bits
393 except UnicodeEncodeError:
394 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395
396 def _decodeExtra(self):
397 # Try to decode the extra field.
398 extra = self.extra
399 unpack = struct.unpack
400 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000401 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402 if tp == 1:
403 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000404 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000405 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000406 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000408 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409 elif ln == 0:
410 counts = ()
411 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000412 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413
414 idx = 0
415
416 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000417 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000418 self.file_size = counts[idx]
419 idx += 1
420
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000421 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000422 self.compress_size = counts[idx]
423 idx += 1
424
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000425 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000426 old = self.header_offset
427 self.header_offset = counts[idx]
428 idx+=1
429
430 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000431
432
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433class _ZipDecrypter:
434 """Class to handle decryption of files stored within a ZIP archive.
435
436 ZIP supports a password-based form of encryption. Even though known
437 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000438 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 Usage:
441 zd = _ZipDecrypter(mypwd)
442 plain_char = zd(cypher_char)
443 plain_text = map(zd, cypher_text)
444 """
445
446 def _GenerateCRCTable():
447 """Generate a CRC-32 table.
448
449 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
450 internal keys. We noticed that a direct implementation is faster than
451 relying on binascii.crc32().
452 """
453 poly = 0xedb88320
454 table = [0] * 256
455 for i in range(256):
456 crc = i
457 for j in range(8):
458 if crc & 1:
459 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
460 else:
461 crc = ((crc >> 1) & 0x7FFFFFFF)
462 table[i] = crc
463 return table
464 crctable = _GenerateCRCTable()
465
466 def _crc32(self, ch, crc):
467 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000469
470 def __init__(self, pwd):
471 self.key0 = 305419896
472 self.key1 = 591751049
473 self.key2 = 878082192
474 for p in pwd:
475 self._UpdateKeys(p)
476
477 def _UpdateKeys(self, c):
478 self.key0 = self._crc32(c, self.key0)
479 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
480 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000481 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000482
483 def __call__(self, c):
484 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000485 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000486 k = self.key2 | 2
487 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000488 self._UpdateKeys(c)
489 return c
490
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200491
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200492class LZMACompressor:
493
494 def __init__(self):
495 self._comp = None
496
497 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200498 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200499 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200500 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200501 ])
502 return struct.pack('<BBH', 9, 4, len(props)) + props
503
504 def compress(self, data):
505 if self._comp is None:
506 return self._init() + self._comp.compress(data)
507 return self._comp.compress(data)
508
509 def flush(self):
510 if self._comp is None:
511 return self._init() + self._comp.flush()
512 return self._comp.flush()
513
514
515class LZMADecompressor:
516
517 def __init__(self):
518 self._decomp = None
519 self._unconsumed = b''
520 self.eof = False
521
522 def decompress(self, data):
523 if self._decomp is None:
524 self._unconsumed += data
525 if len(self._unconsumed) <= 4:
526 return b''
527 psize, = struct.unpack('<H', self._unconsumed[2:4])
528 if len(self._unconsumed) <= 4 + psize:
529 return b''
530
531 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200532 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200533 self._unconsumed[4:4 + psize])
534 ])
535 data = self._unconsumed[4 + psize:]
536 del self._unconsumed
537
538 result = self._decomp.decompress(data)
539 self.eof = self._decomp.eof
540 return result
541
542
543compressor_names = {
544 0: 'store',
545 1: 'shrink',
546 2: 'reduce',
547 3: 'reduce',
548 4: 'reduce',
549 5: 'reduce',
550 6: 'implode',
551 7: 'tokenize',
552 8: 'deflate',
553 9: 'deflate64',
554 10: 'implode',
555 12: 'bzip2',
556 14: 'lzma',
557 18: 'terse',
558 19: 'lz77',
559 97: 'wavpack',
560 98: 'ppmd',
561}
562
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200563def _check_compression(compression):
564 if compression == ZIP_STORED:
565 pass
566 elif compression == ZIP_DEFLATED:
567 if not zlib:
568 raise RuntimeError(
569 "Compression requires the (missing) zlib module")
570 elif compression == ZIP_BZIP2:
571 if not bz2:
572 raise RuntimeError(
573 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200574 elif compression == ZIP_LZMA:
575 if not lzma:
576 raise RuntimeError(
577 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200578 else:
579 raise RuntimeError("That compression method is not supported")
580
581
582def _get_compressor(compress_type):
583 if compress_type == ZIP_DEFLATED:
584 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
585 zlib.DEFLATED, -15)
586 elif compress_type == ZIP_BZIP2:
587 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200588 elif compress_type == ZIP_LZMA:
589 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200590 else:
591 return None
592
593
594def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200595 if compress_type == ZIP_STORED:
596 return None
597 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200598 return zlib.decompressobj(-15)
599 elif compress_type == ZIP_BZIP2:
600 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200601 elif compress_type == ZIP_LZMA:
602 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200603 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200604 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200605 if descr:
606 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
607 else:
608 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609
610
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000611class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000612 """File-like object for reading an archive member.
613 Is returned by ZipFile.open().
614 """
615
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616 # Max size supported by decompressor.
617 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000619 # Read from compressed files in 4k blocks.
620 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000621
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 # Search for universal newlines or line chunks.
623 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
624
Łukasz Langae94980a2010-11-22 23:31:26 +0000625 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
626 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627 self._fileobj = fileobj
628 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000629 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630
Ezio Melotti92b47432010-01-28 01:44:41 +0000631 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000632 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200633 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000634
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200635 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000636
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 self._readbuffer = b''
639 self._offset = 0
640
641 self._universal = 'U' in mode
642 self.newlines = None
643
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644 # Adjust read size for encrypted files since the first 12 bytes
645 # are for the encryption/password information.
646 if self._decrypter is not None:
647 self._compress_left -= 12
648
649 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650 self.name = zipinfo.filename
651
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000652 if hasattr(zipinfo, 'CRC'):
653 self._expected_crc = zipinfo.CRC
654 self._running_crc = crc32(b'') & 0xffffffff
655 else:
656 self._expected_crc = None
657
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000658 def readline(self, limit=-1):
659 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000661 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000663
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000664 if not self._universal and limit < 0:
665 # Shortcut common case - newline found in buffer.
666 i = self._readbuffer.find(b'\n', self._offset) + 1
667 if i > 0:
668 line = self._readbuffer[self._offset: i]
669 self._offset = i
670 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000671
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000672 if not self._universal:
673 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000674
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000675 line = b''
676 while limit < 0 or len(line) < limit:
677 readahead = self.peek(2)
678 if readahead == b'':
679 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000680
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000681 #
682 # Search for universal newlines or line chunks.
683 #
684 # The pattern returns either a line chunk or a newline, but not
685 # both. Combined with peek(2), we are assured that the sequence
686 # '\r\n' is always retrieved completely and never split into
687 # separate newlines - '\r', '\n' due to coincidental readaheads.
688 #
689 match = self.PATTERN.search(readahead)
690 newline = match.group('newline')
691 if newline is not None:
692 if self.newlines is None:
693 self.newlines = []
694 if newline not in self.newlines:
695 self.newlines.append(newline)
696 self._offset += len(newline)
697 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 chunk = match.group('chunk')
700 if limit >= 0:
701 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000702
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000703 self._offset += len(chunk)
704 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000705
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000706 return line
707
708 def peek(self, n=1):
709 """Returns buffered bytes without advancing the position."""
710 if n > len(self._readbuffer) - self._offset:
711 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200712 if len(chunk) > self._offset:
713 self._readbuffer = chunk + self._readbuffer[self._offset:]
714 self._offset = 0
715 else:
716 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000717
718 # Return up to 512 bytes to reduce allocation overhead for tight loops.
719 return self._readbuffer[self._offset: self._offset + 512]
720
721 def readable(self):
722 return True
723
724 def read(self, n=-1):
725 """Read and return up to n bytes.
726 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000727 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200728 if n is None or n < 0:
729 buf = self._readbuffer[self._offset:]
730 self._readbuffer = b''
731 self._offset = 0
732 while not self._eof:
733 buf += self._read1(self.MAX_N)
734 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000735
Antoine Pitrou78157b32012-06-23 16:44:48 +0200736 end = n + self._offset
737 if end < len(self._readbuffer):
738 buf = self._readbuffer[self._offset:end]
739 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200740 return buf
741
Antoine Pitrou78157b32012-06-23 16:44:48 +0200742 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200743 buf = self._readbuffer[self._offset:]
744 self._readbuffer = b''
745 self._offset = 0
746 while n > 0 and not self._eof:
747 data = self._read1(n)
748 if n < len(data):
749 self._readbuffer = data
750 self._offset = n
751 buf += data[:n]
752 break
753 buf += data
754 n -= len(data)
755 return buf
756
757 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000758 # Update the CRC using the given data.
759 if self._expected_crc is None:
760 # No need to compute the CRC if we don't have a reference value
761 return
762 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
763 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200764 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000765 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000766
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000767 def read1(self, n):
768 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200770 if n is None or n < 0:
771 buf = self._readbuffer[self._offset:]
772 self._readbuffer = b''
773 self._offset = 0
774 data = self._read1(self.MAX_N)
775 buf += data
776 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777
Antoine Pitrou78157b32012-06-23 16:44:48 +0200778 end = n + self._offset
779 if end < len(self._readbuffer):
780 buf = self._readbuffer[self._offset:end]
781 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200782 return buf
783
Antoine Pitrou78157b32012-06-23 16:44:48 +0200784 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200785 buf = self._readbuffer[self._offset:]
786 self._readbuffer = b''
787 self._offset = 0
788 if n > 0:
789 data = self._read1(n)
790 if n < len(data):
791 self._readbuffer = data
792 self._offset = n
793 data = data[:n]
794 buf += data
795 return buf
796
797 def _read1(self, n):
798 # Read up to n compressed bytes with at most one read() system call,
799 # decrypt and decompress them.
800 if self._eof or n <= 0:
801 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000803 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200804 if self._compress_type == ZIP_DEFLATED:
805 ## Handle unconsumed data.
806 data = self._decompressor.unconsumed_tail
807 if n > len(data):
808 data += self._read2(n - len(data))
809 else:
810 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200812 if self._compress_type == ZIP_STORED:
813 self._eof = self._compress_left <= 0
814 elif self._compress_type == ZIP_DEFLATED:
815 n = max(n, self.MIN_READ_SIZE)
816 data = self._decompressor.decompress(data, n)
817 self._eof = (self._decompressor.eof or
818 self._compress_left <= 0 and
819 not self._decompressor.unconsumed_tail)
820 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000821 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200822 else:
823 data = self._decompressor.decompress(data)
824 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200826 data = data[:self._left]
827 self._left -= len(data)
828 if self._left <= 0:
829 self._eof = True
830 self._update_crc(data)
831 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000832
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200833 def _read2(self, n):
834 if self._compress_left <= 0:
835 return b''
836
837 n = max(n, self.MIN_READ_SIZE)
838 n = min(n, self._compress_left)
839
840 data = self._fileobj.read(n)
841 self._compress_left -= len(data)
842
843 if self._decrypter is not None:
844 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000845 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Łukasz Langae94980a2010-11-22 23:31:26 +0000847 def close(self):
848 try:
849 if self._close_fileobj:
850 self._fileobj.close()
851 finally:
852 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000854
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000856 """ Class with methods to open, read, write, close, list zip files.
857
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000858 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000859
Fred Drake3d9091e2001-03-26 15:49:24 +0000860 file: Either the path to the file, or a file-like object.
861 If it is a path, the file will be opened and closed by ZipFile.
862 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200863 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
864 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000865 allowZip64: if True ZipFile will create files with ZIP64 extensions when
866 needed, otherwise it will raise an exception when this would
867 be necessary.
868
Fred Drake3d9091e2001-03-26 15:49:24 +0000869 """
Fred Drake484d7352000-10-02 21:14:52 +0000870
Fred Drake90eac282001-02-28 05:29:34 +0000871 fp = None # Set here since __del__ checks it
872
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000873 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000874 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000875 if mode not in ("r", "w", "a"):
876 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
877
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200878 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000879
880 self._allowZip64 = allowZip64
881 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000882 self.debug = 0 # Level of printing: 0 through 3
883 self.NameToInfo = {} # Find file info given name
884 self.filelist = [] # List of ZipInfo instances for archive
885 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000886 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000887 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400888 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000889
Fred Drake3d9091e2001-03-26 15:49:24 +0000890 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000891 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000892 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000893 self._filePassed = 0
894 self.filename = file
895 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000896 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000897 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000898 except IOError:
899 if mode == 'a':
900 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000901 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000902 else:
903 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000904 else:
905 self._filePassed = 1
906 self.fp = file
907 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000908
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910 self._GetContents()
911 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000912 # set the modified flag so central directory gets written
913 # even if no files are added to the archive
914 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000916 try:
917 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000918 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000919 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000920 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000921 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000922 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000923 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000924
925 # set the modified flag so central directory gets written
926 # even if no files are added to the archive
927 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000928 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000929 if not self._filePassed:
930 self.fp.close()
931 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000932 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000933
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000934 def __enter__(self):
935 return self
936
937 def __exit__(self, type, value, traceback):
938 self.close()
939
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000940 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000941 """Read the directory, making sure we close the file if the format
942 is bad."""
943 try:
944 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000945 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000946 if not self._filePassed:
947 self.fp.close()
948 self.fp = None
949 raise
950
951 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000952 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000953 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000954 try:
955 endrec = _EndRecData(fp)
956 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000957 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000958 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000959 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000960 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000961 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000962 size_cd = endrec[_ECD_SIZE] # bytes in central directory
963 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400964 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000965
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000966 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000967 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000968 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
969 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000970 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
971
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000973 inferred = concat + offset_cd
974 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000975 # self.start_dir: Position of start of central directory
976 self.start_dir = offset_cd + concat
977 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000978 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000979 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000980 total = 0
981 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000982 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000983 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000984 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000985 centdir = struct.unpack(structCentralDir, centdir)
986 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000987 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000988 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000989 flags = centdir[5]
990 if flags & 0x800:
991 # UTF-8 file names extension
992 filename = filename.decode('utf-8')
993 else:
994 # Historical ZIP filename encoding
995 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000997 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000998 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
999 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001000 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 (x.create_version, x.create_system, x.extract_version, x.reserved,
1002 x.flag_bits, x.compress_type, t, d,
1003 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001004 if x.extract_version > MAX_EXTRACT_VERSION:
1005 raise NotImplementedError("zip file version %.1f" %
1006 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1008 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001009 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +00001011 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001012
1013 x._decodeExtra()
1014 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 self.filelist.append(x)
1016 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001017
1018 # update total bytes read from central directory
1019 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1020 + centdir[_CD_EXTRA_FIELD_LENGTH]
1021 + centdir[_CD_COMMENT_LENGTH])
1022
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001024 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001025
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026
1027 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001028 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001029 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001030
1031 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001032 """Return a list of class ZipInfo instances for files in the
1033 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 return self.filelist
1035
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001036 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001037 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001038 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1039 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001041 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001042 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1043 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044
1045 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001046 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001047 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 for zinfo in self.filelist:
1049 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001050 # Read by chunks, to avoid an OverflowError or a
1051 # MemoryError with very large embedded files.
1052 f = self.open(zinfo.filename, "r")
1053 while f.read(chunk_size): # Check CRC-32
1054 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001055 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 return zinfo.filename
1057
1058 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001059 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001060 info = self.NameToInfo.get(name)
1061 if info is None:
1062 raise KeyError(
1063 'There is no item named %r in the archive' % name)
1064
1065 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001066
Thomas Wouterscf297e42007-02-23 15:07:44 +00001067 def setpassword(self, pwd):
1068 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001069 if pwd and not isinstance(pwd, bytes):
1070 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1071 if pwd:
1072 self.pwd = pwd
1073 else:
1074 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001075
R David Murrayf50b38a2012-04-12 18:44:58 -04001076 @property
1077 def comment(self):
1078 """The comment text associated with the ZIP file."""
1079 return self._comment
1080
1081 @comment.setter
1082 def comment(self, comment):
1083 if not isinstance(comment, bytes):
1084 raise TypeError("comment: expected bytes, got %s" % type(comment))
1085 # check for valid comment length
1086 if len(comment) >= ZIP_MAX_COMMENT:
1087 if self.debug:
1088 print('Archive comment is too long; truncating to %d bytes'
1089 % ZIP_MAX_COMMENT)
1090 comment = comment[:ZIP_MAX_COMMENT]
1091 self._comment = comment
1092 self._didModify = True
1093
Thomas Wouterscf297e42007-02-23 15:07:44 +00001094 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001095 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001096 with self.open(name, "r", pwd) as fp:
1097 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001098
1099 def open(self, name, mode="r", pwd=None):
1100 """Return file-like object for 'name'."""
1101 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001102 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001103 if pwd and not isinstance(pwd, bytes):
1104 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001106 raise RuntimeError(
1107 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001108
Guido van Rossumd8faa362007-04-27 19:54:29 +00001109 # Only open a new file for instances where we were not
1110 # given a file object in the constructor
1111 if self._filePassed:
1112 zef_file = self.fp
1113 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001114 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001115
Georg Brandlb533e262008-05-25 18:19:30 +00001116 # Make sure we have an info object
1117 if isinstance(name, ZipInfo):
1118 # 'name' is already an info object
1119 zinfo = name
1120 else:
1121 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001122 try:
1123 zinfo = self.getinfo(name)
1124 except KeyError:
1125 if not self._filePassed:
1126 zef_file.close()
1127 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001128 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001129
1130 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001131 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +00001132 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +00001133 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001134
1135 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001137 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001138 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001139
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001140 if zinfo.flag_bits & 0x20:
1141 # Zip 2.7: compressed patched data
1142 raise NotImplementedError("compressed patched data (flag bit 5)")
1143
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001144 if zinfo.flag_bits & 0x40:
1145 # strong encryption
1146 raise NotImplementedError("strong encryption (flag bit 6)")
1147
Georg Brandl5ba11de2011-01-01 10:09:32 +00001148 if zinfo.flag_bits & 0x800:
1149 # UTF-8 filename
1150 fname_str = fname.decode("utf-8")
1151 else:
1152 fname_str = fname.decode("cp437")
1153
1154 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001155 if not self._filePassed:
1156 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +00001157 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +00001158 'File name in directory %r and header %r differ.'
1159 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001160
Guido van Rossumd8faa362007-04-27 19:54:29 +00001161 # check for encrypted flag & handle password
1162 is_encrypted = zinfo.flag_bits & 0x1
1163 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001164 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 if not pwd:
1166 pwd = self.pwd
1167 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001168 if not self._filePassed:
1169 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +00001170 raise RuntimeError("File %s is encrypted, "
1171 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172
Thomas Wouterscf297e42007-02-23 15:07:44 +00001173 zd = _ZipDecrypter(pwd)
1174 # The first 12 bytes in the cypher stream is an encryption header
1175 # used to strengthen the algorithm. The first 11 bytes are
1176 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +00001177 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +00001178 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +00001179 header = zef_file.read(12)
1180 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +00001181 if zinfo.flag_bits & 0x8:
1182 # compare against the file type from extended local headers
1183 check_byte = (zinfo._raw_time >> 8) & 0xff
1184 else:
1185 # compare against the CRC otherwise
1186 check_byte = (zinfo.CRC >> 24) & 0xff
1187 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001188 if not self._filePassed:
1189 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +00001190 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001191
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001192 return ZipExtFile(zef_file, mode, zinfo, zd,
1193 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194
Christian Heimes790c8232008-01-07 21:14:23 +00001195 def extract(self, member, path=None, pwd=None):
1196 """Extract a member from the archive to the current working directory,
1197 using its full name. Its file information is extracted as accurately
1198 as possible. `member' may be a filename or a ZipInfo object. You can
1199 specify a different directory using `path'.
1200 """
1201 if not isinstance(member, ZipInfo):
1202 member = self.getinfo(member)
1203
1204 if path is None:
1205 path = os.getcwd()
1206
1207 return self._extract_member(member, path, pwd)
1208
1209 def extractall(self, path=None, members=None, pwd=None):
1210 """Extract all members from the archive to the current working
1211 directory. `path' specifies a different directory to extract to.
1212 `members' is optional and must be a subset of the list returned
1213 by namelist().
1214 """
1215 if members is None:
1216 members = self.namelist()
1217
1218 for zipinfo in members:
1219 self.extract(zipinfo, path, pwd)
1220
1221 def _extract_member(self, member, targetpath, pwd):
1222 """Extract the ZipInfo object 'member' to a physical
1223 file on the path targetpath.
1224 """
1225 # build the destination pathname, replacing
1226 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001227 # Strip trailing path separator, unless it represents the root.
1228 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1229 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001230 targetpath = targetpath[:-1]
1231
1232 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001233 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001234 targetpath = os.path.join(targetpath, member.filename[1:])
1235 else:
1236 targetpath = os.path.join(targetpath, member.filename)
1237
1238 targetpath = os.path.normpath(targetpath)
1239
1240 # Create all upper directories if necessary.
1241 upperdirs = os.path.dirname(targetpath)
1242 if upperdirs and not os.path.exists(upperdirs):
1243 os.makedirs(upperdirs)
1244
Martin v. Löwis59e47792009-01-24 14:10:07 +00001245 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001246 if not os.path.isdir(targetpath):
1247 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001248 return targetpath
1249
Georg Brandlb533e262008-05-25 18:19:30 +00001250 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001251 target = open(targetpath, "wb")
1252 shutil.copyfileobj(source, target)
1253 source.close()
1254 target.close()
1255
1256 return targetpath
1257
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001259 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001260 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001261 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001262 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001264 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001265 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001266 raise RuntimeError(
1267 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001268 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001269 if zinfo.file_size > ZIP64_LIMIT:
1270 if not self._allowZip64:
1271 raise LargeZipFile("Filesize would require ZIP64 extensions")
1272 if zinfo.header_offset > ZIP64_LIMIT:
1273 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001274 raise LargeZipFile(
1275 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001276
1277 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001278 """Put the bytes from filename into the archive under the name
1279 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001280 if not self.fp:
1281 raise RuntimeError(
1282 "Attempt to write to ZIP archive that was already closed")
1283
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001285 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001286 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 date_time = mtime[0:6]
1288 # Create ZipInfo instance to store file information
1289 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001290 arcname = filename
1291 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1292 while arcname[0] in (os.sep, os.altsep):
1293 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001294 if isdir:
1295 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001296 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001297 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001299 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 else:
Tim Peterse1190062001-01-15 03:34:38 +00001301 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001302
1303 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001304 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001305 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001306 if zinfo.compress_type == ZIP_LZMA:
1307 # Compressed data includes an end-of-stream (EOS) marker
1308 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001309
1310 self._writecheck(zinfo)
1311 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001312
1313 if isdir:
1314 zinfo.file_size = 0
1315 zinfo.compress_size = 0
1316 zinfo.CRC = 0
1317 self.filelist.append(zinfo)
1318 self.NameToInfo[zinfo.filename] = zinfo
1319 self.fp.write(zinfo.FileHeader())
1320 return
1321
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001322 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001323 with open(filename, "rb") as fp:
1324 # Must overwrite CRC and sizes with correct data later
1325 zinfo.CRC = CRC = 0
1326 zinfo.compress_size = compress_size = 0
1327 zinfo.file_size = file_size = 0
1328 self.fp.write(zinfo.FileHeader())
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001329 while 1:
1330 buf = fp.read(1024 * 8)
1331 if not buf:
1332 break
1333 file_size = file_size + len(buf)
1334 CRC = crc32(buf, CRC) & 0xffffffff
1335 if cmpr:
1336 buf = cmpr.compress(buf)
1337 compress_size = compress_size + len(buf)
1338 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 if cmpr:
1340 buf = cmpr.flush()
1341 compress_size = compress_size + len(buf)
1342 self.fp.write(buf)
1343 zinfo.compress_size = compress_size
1344 else:
1345 zinfo.compress_size = file_size
1346 zinfo.CRC = CRC
1347 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001348 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001349 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001350 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001351 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001353 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354 self.filelist.append(zinfo)
1355 self.NameToInfo[zinfo.filename] = zinfo
1356
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001357 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001358 """Write a file into the archive. The contents is 'data', which
1359 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1360 it is encoded as UTF-8 first.
1361 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001362 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001363 if isinstance(data, str):
1364 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001365 if not isinstance(zinfo_or_arcname, ZipInfo):
1366 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001367 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001368 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001369 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001370 else:
1371 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001372
1373 if not self.fp:
1374 raise RuntimeError(
1375 "Attempt to write to ZIP archive that was already closed")
1376
Guido van Rossum85825dc2007-08-27 17:03:28 +00001377 zinfo.file_size = len(data) # Uncompressed size
1378 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001379 if compress_type is not None:
1380 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001381 if zinfo.compress_type == ZIP_LZMA:
1382 # Compressed data includes an end-of-stream (EOS) marker
1383 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001384
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001385 self._writecheck(zinfo)
1386 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001387 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001388 co = _get_compressor(zinfo.compress_type)
1389 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001390 data = co.compress(data) + co.flush()
1391 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 else:
1393 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001394 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001395 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001396 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001397 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001398 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001399 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001400 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001401 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001402 self.filelist.append(zinfo)
1403 self.NameToInfo[zinfo.filename] = zinfo
1404
1405 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001406 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001407 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001408
1409 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001410 """Close the file, and for mode "w" and "a" write the ending
1411 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001412 if self.fp is None:
1413 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001414
1415 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001416 count = 0
1417 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001418 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001419 count = count + 1
1420 dt = zinfo.date_time
1421 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001422 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001423 extra = []
1424 if zinfo.file_size > ZIP64_LIMIT \
1425 or zinfo.compress_size > ZIP64_LIMIT:
1426 extra.append(zinfo.file_size)
1427 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001428 file_size = 0xffffffff
1429 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001430 else:
1431 file_size = zinfo.file_size
1432 compress_size = zinfo.compress_size
1433
1434 if zinfo.header_offset > ZIP64_LIMIT:
1435 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001436 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001437 else:
1438 header_offset = zinfo.header_offset
1439
1440 extra_data = zinfo.extra
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001441 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001442 if extra:
1443 # Append a ZIP64 field to the extra's
1444 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001445 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001446 1, 8*len(extra), *extra) + extra_data
1447
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001448 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001449
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001450 if zinfo.compress_type == ZIP_BZIP2:
1451 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001452 elif zinfo.compress_type == ZIP_LZMA:
1453 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001454
1455 extract_version = max(min_version, zinfo.extract_version)
1456 create_version = max(min_version, zinfo.create_version)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001457 try:
1458 filename, flag_bits = zinfo._encodeFilenameFlags()
1459 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001460 stringCentralDir, create_version,
1461 zinfo.create_system, extract_version, zinfo.reserved,
1462 flag_bits, zinfo.compress_type, dostime, dosdate,
1463 zinfo.CRC, compress_size, file_size,
1464 len(filename), len(extra_data), len(zinfo.comment),
1465 0, zinfo.internal_attr, zinfo.external_attr,
1466 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001467 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001468 print((structCentralDir, stringCentralDir, create_version,
1469 zinfo.create_system, extract_version, zinfo.reserved,
1470 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1471 zinfo.CRC, compress_size, file_size,
1472 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1473 0, zinfo.internal_attr, zinfo.external_attr,
1474 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001475 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001476 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001477 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001478 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001479 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001480
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001481 pos2 = self.fp.tell()
1482 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001483 centDirCount = count
1484 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001485 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001486 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1487 centDirOffset > ZIP64_LIMIT or
1488 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001489 # Need to write the ZIP64 end-of-archive records
1490 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001491 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001492 44, 45, 45, 0, 0, centDirCount, centDirCount,
1493 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001494 self.fp.write(zip64endrec)
1495
1496 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001497 structEndArchive64Locator,
1498 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001499 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001500 centDirCount = min(centDirCount, 0xFFFF)
1501 centDirSize = min(centDirSize, 0xFFFFFFFF)
1502 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001503
Georg Brandl2ee470f2008-07-16 12:55:28 +00001504 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001505 0, 0, centDirCount, centDirCount,
R David Murrayf50b38a2012-04-12 18:44:58 -04001506 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001507 self.fp.write(endrec)
R David Murrayf50b38a2012-04-12 18:44:58 -04001508 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001509 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001510
Fred Drake3d9091e2001-03-26 15:49:24 +00001511 if not self._filePassed:
1512 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001513 self.fp = None
1514
1515
1516class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001517 """Class to create ZIP archives with Python library files and packages."""
1518
Georg Brandl8334fd92010-12-04 10:26:46 +00001519 def __init__(self, file, mode="r", compression=ZIP_STORED,
1520 allowZip64=False, optimize=-1):
1521 ZipFile.__init__(self, file, mode=mode, compression=compression,
1522 allowZip64=allowZip64)
1523 self._optimize = optimize
1524
Georg Brandlfe991052009-09-16 15:54:04 +00001525 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001526 """Add all files from "pathname" to the ZIP archive.
1527
Fred Drake484d7352000-10-02 21:14:52 +00001528 If pathname is a package directory, search the directory and
1529 all package subdirectories recursively for all *.py and enter
1530 the modules into the archive. If pathname is a plain
1531 directory, listdir *.py and enter all modules. Else, pathname
1532 must be a Python *.py file and the module will be put into the
1533 archive. Added modules are always module.pyo or module.pyc.
1534 This method will compile the module.py into module.pyc if
1535 necessary.
1536 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001537 dir, name = os.path.split(pathname)
1538 if os.path.isdir(pathname):
1539 initname = os.path.join(pathname, "__init__.py")
1540 if os.path.isfile(initname):
1541 # This is a package directory, add it
1542 if basename:
1543 basename = "%s/%s" % (basename, name)
1544 else:
1545 basename = name
1546 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001547 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001548 fname, arcname = self._get_codename(initname[0:-3], basename)
1549 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001550 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001551 self.write(fname, arcname)
1552 dirlist = os.listdir(pathname)
1553 dirlist.remove("__init__.py")
1554 # Add all *.py files and package subdirectories
1555 for filename in dirlist:
1556 path = os.path.join(pathname, filename)
1557 root, ext = os.path.splitext(filename)
1558 if os.path.isdir(path):
1559 if os.path.isfile(os.path.join(path, "__init__.py")):
1560 # This is a package directory, add it
1561 self.writepy(path, basename) # Recursive call
1562 elif ext == ".py":
1563 fname, arcname = self._get_codename(path[0:-3],
1564 basename)
1565 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001566 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001567 self.write(fname, arcname)
1568 else:
1569 # This is NOT a package directory, add its files at top level
1570 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001571 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001572 for filename in os.listdir(pathname):
1573 path = os.path.join(pathname, filename)
1574 root, ext = os.path.splitext(filename)
1575 if ext == ".py":
1576 fname, arcname = self._get_codename(path[0:-3],
1577 basename)
1578 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001579 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001580 self.write(fname, arcname)
1581 else:
1582 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001583 raise RuntimeError(
1584 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001585 fname, arcname = self._get_codename(pathname[0:-3], basename)
1586 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001587 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001588 self.write(fname, arcname)
1589
1590 def _get_codename(self, pathname, basename):
1591 """Return (filename, archivename) for the path.
1592
Fred Drake484d7352000-10-02 21:14:52 +00001593 Given a module name path, return the correct file path and
1594 archive name, compiling if necessary. For example, given
1595 /python/lib/string, return (/python/lib/string.pyc, string).
1596 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001597 def _compile(file, optimize=-1):
1598 import py_compile
1599 if self.debug:
1600 print("Compiling", file)
1601 try:
1602 py_compile.compile(file, doraise=True, optimize=optimize)
1603 except py_compile.PyCompileError as error:
1604 print(err.msg)
1605 return False
1606 return True
1607
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001608 file_py = pathname + ".py"
1609 file_pyc = pathname + ".pyc"
1610 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001611 pycache_pyc = imp.cache_from_source(file_py, True)
1612 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001613 if self._optimize == -1:
1614 # legacy mode: use whatever file is present
1615 if (os.path.isfile(file_pyo) and
1616 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1617 # Use .pyo file.
1618 arcname = fname = file_pyo
1619 elif (os.path.isfile(file_pyc) and
1620 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1621 # Use .pyc file.
1622 arcname = fname = file_pyc
1623 elif (os.path.isfile(pycache_pyc) and
1624 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1625 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1626 # file name in the archive.
1627 fname = pycache_pyc
1628 arcname = file_pyc
1629 elif (os.path.isfile(pycache_pyo) and
1630 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1631 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1632 # file name in the archive.
1633 fname = pycache_pyo
1634 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001635 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001636 # Compile py into PEP 3147 pyc file.
1637 if _compile(file_py):
1638 fname = (pycache_pyc if __debug__ else pycache_pyo)
1639 arcname = (file_pyc if __debug__ else file_pyo)
1640 else:
1641 fname = arcname = file_py
1642 else:
1643 # new mode: use given optimization level
1644 if self._optimize == 0:
1645 fname = pycache_pyc
1646 arcname = file_pyc
1647 else:
1648 fname = pycache_pyo
1649 arcname = file_pyo
1650 if not (os.path.isfile(fname) and
1651 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1652 if not _compile(file_py, optimize=self._optimize):
1653 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001654 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001655 if basename:
1656 archivename = "%s/%s" % (basename, archivename)
1657 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001658
1659
1660def main(args = None):
1661 import textwrap
1662 USAGE=textwrap.dedent("""\
1663 Usage:
1664 zipfile.py -l zipfile.zip # Show listing of a zipfile
1665 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1666 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1667 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1668 """)
1669 if args is None:
1670 args = sys.argv[1:]
1671
1672 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001673 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001674 sys.exit(1)
1675
1676 if args[0] == '-l':
1677 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001678 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001679 sys.exit(1)
1680 zf = ZipFile(args[1], 'r')
1681 zf.printdir()
1682 zf.close()
1683
1684 elif args[0] == '-t':
1685 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001686 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001687 sys.exit(1)
1688 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001689 badfile = zf.testzip()
1690 if badfile:
1691 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001692 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001693
1694 elif args[0] == '-e':
1695 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001696 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001697 sys.exit(1)
1698
1699 zf = ZipFile(args[1], 'r')
1700 out = args[2]
1701 for path in zf.namelist():
1702 if path.startswith('./'):
1703 tgt = os.path.join(out, path[2:])
1704 else:
1705 tgt = os.path.join(out, path)
1706
1707 tgtdir = os.path.dirname(tgt)
1708 if not os.path.exists(tgtdir):
1709 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001710 with open(tgt, 'wb') as fp:
1711 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001712 zf.close()
1713
1714 elif args[0] == '-c':
1715 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001716 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001717 sys.exit(1)
1718
1719 def addToZip(zf, path, zippath):
1720 if os.path.isfile(path):
1721 zf.write(path, zippath, ZIP_DEFLATED)
1722 elif os.path.isdir(path):
1723 for nm in os.listdir(path):
1724 addToZip(zf,
1725 os.path.join(path, nm), os.path.join(zippath, nm))
1726 # else: ignore
1727
1728 zf = ZipFile(args[1], 'w', allowZip64=True)
1729 for src in args[2:]:
1730 addToZip(zf, src, os.path.basename(src))
1731
1732 zf.close()
1733
1734if __name__ == "__main__":
1735 main()