blob: a6c07e61c5d5fc892990e1645301696710a51c85 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
32except ImportError:
33 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_FILECOUNT_LIMIT = 1 << 16
54ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000167 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
183 except IOError:
184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
193 except IOError:
194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000199 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
200 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 return endrec
202
203 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000204 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000205
206 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000207 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
208 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209 sig, sz, create_version, read_version, disk_num, disk_dir, \
210 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000211 struct.unpack(structEndArchive64, data)
212 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 return endrec
214
215 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000216 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000217 endrec[_ECD_DISK_NUMBER] = disk_num
218 endrec[_ECD_DISK_START] = disk_dir
219 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
220 endrec[_ECD_ENTRIES_TOTAL] = dircount2
221 endrec[_ECD_SIZE] = dirsize
222 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000223 return endrec
224
225
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000226def _EndRecData(fpin):
227 """Return data from the "End of Central Directory" record, or None.
228
229 The data is a list of the nine items in the ZIP "End of central dir"
230 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231
232 # Determine file size
233 fpin.seek(0, 2)
234 filesize = fpin.tell()
235
236 # Check to see if this is ZIP file with no archive comment (the
237 # "end of central directory" structure should be the last item in the
238 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000239 try:
240 fpin.seek(-sizeEndCentDir, 2)
241 except IOError:
242 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000246 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec=list(endrec)
248
249 # Append a blank comment and record start offset
250 endrec.append(b"")
251 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255
256 # Either this is not a ZIP file, or it is a ZIP file with an archive
257 # comment. Search the end of the file for the "end of central directory"
258 # record signature. The comment is the last item in the ZIP file and may be
259 # up to 64K long. It is assumed that the "end of central directory" magic
260 # number does not appear in the comment.
261 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
262 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000263 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000264 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265 if start >= 0:
266 # found the magic number; attempt to unpack and interpret
267 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000268 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400269 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
270 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
271 endrec.append(comment)
272 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000273
R David Murray4fbb9db2011-06-09 15:50:51 -0400274 # Try to read the "Zip64 end of central directory" structure
275 return _EndRecData64(fpin, maxCommentStart + start - filesize,
276 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000277
278 # Unable to find a valid end of central directory structure
279 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000280
Fred Drake484d7352000-10-02 21:14:52 +0000281
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000283 """Class with attributes describing each file in the ZIP archive."""
284
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 __slots__ = (
286 'orig_filename',
287 'filename',
288 'date_time',
289 'compress_type',
290 'comment',
291 'extra',
292 'create_system',
293 'create_version',
294 'extract_version',
295 'reserved',
296 'flag_bits',
297 'volume',
298 'internal_attr',
299 'external_attr',
300 'header_offset',
301 'CRC',
302 'compress_size',
303 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000304 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000305 )
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000308 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000309
310 # Terminate the file name at the first null byte. Null bytes in file
311 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000312 null_byte = filename.find(chr(0))
313 if null_byte >= 0:
314 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315 # This is used to ensure paths in generated ZIP files always use
316 # forward slashes as the directory separator, as required by the
317 # ZIP format specification.
318 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000319 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000322 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800323
324 if date_time[0] < 1980:
325 raise ValueError('ZIP does not support timestamps before 1980')
326
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000328 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000329 self.comment = b"" # Comment for each file
330 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000331 if sys.platform == 'win32':
332 self.create_system = 0 # System which created ZIP archive
333 else:
334 # Assume everything else is unix-y
335 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200336 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
337 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.reserved = 0 # Must be zero
339 self.flag_bits = 0 # ZIP flag bits
340 self.volume = 0 # Volume number of file header
341 self.internal_attr = 0 # Internal attributes
342 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000344 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000345 # CRC CRC-32 of the uncompressed file
346 # compress_size Size of the compressed file
347 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348
349 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000350 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000351 dt = self.date_time
352 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000353 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000354 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000355 # Set these to zero because we write them after the file data
356 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 else:
Tim Peterse1190062001-01-15 03:34:38 +0000358 CRC = self.CRC
359 compress_size = self.compress_size
360 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000361
362 extra = self.extra
363
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200364 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
366 # File is larger than what fits into a 4 byte integer,
367 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 extra = extra + struct.pack(fmt,
370 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000371 file_size = 0xffffffff
372 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200375 if self.compress_type == ZIP_BZIP2:
376 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200377 elif self.compress_type == ZIP_LZMA:
378 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200379
380 self.extract_version = max(min_version, self.extract_version)
381 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000382 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000383 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000384 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 self.compress_type, dostime, dosdate, CRC,
386 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000387 len(filename), len(extra))
388 return header + filename + extra
389
390 def _encodeFilenameFlags(self):
391 try:
392 return self.filename.encode('ascii'), self.flag_bits
393 except UnicodeEncodeError:
394 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395
396 def _decodeExtra(self):
397 # Try to decode the extra field.
398 extra = self.extra
399 unpack = struct.unpack
400 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000401 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402 if tp == 1:
403 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000404 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000405 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000406 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000408 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409 elif ln == 0:
410 counts = ()
411 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000412 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413
414 idx = 0
415
416 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000417 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000418 self.file_size = counts[idx]
419 idx += 1
420
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000421 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000422 self.compress_size = counts[idx]
423 idx += 1
424
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000425 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000426 old = self.header_offset
427 self.header_offset = counts[idx]
428 idx+=1
429
430 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000431
432
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433class _ZipDecrypter:
434 """Class to handle decryption of files stored within a ZIP archive.
435
436 ZIP supports a password-based form of encryption. Even though known
437 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000438 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 Usage:
441 zd = _ZipDecrypter(mypwd)
442 plain_char = zd(cypher_char)
443 plain_text = map(zd, cypher_text)
444 """
445
446 def _GenerateCRCTable():
447 """Generate a CRC-32 table.
448
449 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
450 internal keys. We noticed that a direct implementation is faster than
451 relying on binascii.crc32().
452 """
453 poly = 0xedb88320
454 table = [0] * 256
455 for i in range(256):
456 crc = i
457 for j in range(8):
458 if crc & 1:
459 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
460 else:
461 crc = ((crc >> 1) & 0x7FFFFFFF)
462 table[i] = crc
463 return table
464 crctable = _GenerateCRCTable()
465
466 def _crc32(self, ch, crc):
467 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000469
470 def __init__(self, pwd):
471 self.key0 = 305419896
472 self.key1 = 591751049
473 self.key2 = 878082192
474 for p in pwd:
475 self._UpdateKeys(p)
476
477 def _UpdateKeys(self, c):
478 self.key0 = self._crc32(c, self.key0)
479 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
480 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000481 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000482
483 def __call__(self, c):
484 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000485 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000486 k = self.key2 | 2
487 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000488 self._UpdateKeys(c)
489 return c
490
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200491
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200492class LZMACompressor:
493
494 def __init__(self):
495 self._comp = None
496
497 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200498 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200499 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200500 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200501 ])
502 return struct.pack('<BBH', 9, 4, len(props)) + props
503
504 def compress(self, data):
505 if self._comp is None:
506 return self._init() + self._comp.compress(data)
507 return self._comp.compress(data)
508
509 def flush(self):
510 if self._comp is None:
511 return self._init() + self._comp.flush()
512 return self._comp.flush()
513
514
515class LZMADecompressor:
516
517 def __init__(self):
518 self._decomp = None
519 self._unconsumed = b''
520 self.eof = False
521
522 def decompress(self, data):
523 if self._decomp is None:
524 self._unconsumed += data
525 if len(self._unconsumed) <= 4:
526 return b''
527 psize, = struct.unpack('<H', self._unconsumed[2:4])
528 if len(self._unconsumed) <= 4 + psize:
529 return b''
530
531 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200532 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200533 self._unconsumed[4:4 + psize])
534 ])
535 data = self._unconsumed[4 + psize:]
536 del self._unconsumed
537
538 result = self._decomp.decompress(data)
539 self.eof = self._decomp.eof
540 return result
541
542
543compressor_names = {
544 0: 'store',
545 1: 'shrink',
546 2: 'reduce',
547 3: 'reduce',
548 4: 'reduce',
549 5: 'reduce',
550 6: 'implode',
551 7: 'tokenize',
552 8: 'deflate',
553 9: 'deflate64',
554 10: 'implode',
555 12: 'bzip2',
556 14: 'lzma',
557 18: 'terse',
558 19: 'lz77',
559 97: 'wavpack',
560 98: 'ppmd',
561}
562
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200563def _check_compression(compression):
564 if compression == ZIP_STORED:
565 pass
566 elif compression == ZIP_DEFLATED:
567 if not zlib:
568 raise RuntimeError(
569 "Compression requires the (missing) zlib module")
570 elif compression == ZIP_BZIP2:
571 if not bz2:
572 raise RuntimeError(
573 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200574 elif compression == ZIP_LZMA:
575 if not lzma:
576 raise RuntimeError(
577 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200578 else:
579 raise RuntimeError("That compression method is not supported")
580
581
582def _get_compressor(compress_type):
583 if compress_type == ZIP_DEFLATED:
584 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
585 zlib.DEFLATED, -15)
586 elif compress_type == ZIP_BZIP2:
587 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200588 elif compress_type == ZIP_LZMA:
589 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200590 else:
591 return None
592
593
594def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200595 if compress_type == ZIP_STORED:
596 return None
597 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200598 return zlib.decompressobj(-15)
599 elif compress_type == ZIP_BZIP2:
600 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200601 elif compress_type == ZIP_LZMA:
602 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200603 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200604 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200605 if descr:
606 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
607 else:
608 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609
610
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000611class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000612 """File-like object for reading an archive member.
613 Is returned by ZipFile.open().
614 """
615
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616 # Max size supported by decompressor.
617 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000619 # Read from compressed files in 4k blocks.
620 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000621
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 # Search for universal newlines or line chunks.
623 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
624
Łukasz Langae94980a2010-11-22 23:31:26 +0000625 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
626 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627 self._fileobj = fileobj
628 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000629 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630
Ezio Melotti92b47432010-01-28 01:44:41 +0000631 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000632 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200633 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000634
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200635 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000636
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 self._readbuffer = b''
639 self._offset = 0
640
641 self._universal = 'U' in mode
642 self.newlines = None
643
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644 # Adjust read size for encrypted files since the first 12 bytes
645 # are for the encryption/password information.
646 if self._decrypter is not None:
647 self._compress_left -= 12
648
649 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650 self.name = zipinfo.filename
651
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000652 if hasattr(zipinfo, 'CRC'):
653 self._expected_crc = zipinfo.CRC
654 self._running_crc = crc32(b'') & 0xffffffff
655 else:
656 self._expected_crc = None
657
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000658 def readline(self, limit=-1):
659 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000661 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000663
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000664 if not self._universal and limit < 0:
665 # Shortcut common case - newline found in buffer.
666 i = self._readbuffer.find(b'\n', self._offset) + 1
667 if i > 0:
668 line = self._readbuffer[self._offset: i]
669 self._offset = i
670 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000671
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000672 if not self._universal:
673 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000674
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000675 line = b''
676 while limit < 0 or len(line) < limit:
677 readahead = self.peek(2)
678 if readahead == b'':
679 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000680
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000681 #
682 # Search for universal newlines or line chunks.
683 #
684 # The pattern returns either a line chunk or a newline, but not
685 # both. Combined with peek(2), we are assured that the sequence
686 # '\r\n' is always retrieved completely and never split into
687 # separate newlines - '\r', '\n' due to coincidental readaheads.
688 #
689 match = self.PATTERN.search(readahead)
690 newline = match.group('newline')
691 if newline is not None:
692 if self.newlines is None:
693 self.newlines = []
694 if newline not in self.newlines:
695 self.newlines.append(newline)
696 self._offset += len(newline)
697 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 chunk = match.group('chunk')
700 if limit >= 0:
701 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000702
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000703 self._offset += len(chunk)
704 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000705
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000706 return line
707
708 def peek(self, n=1):
709 """Returns buffered bytes without advancing the position."""
710 if n > len(self._readbuffer) - self._offset:
711 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200712 if len(chunk) > self._offset:
713 self._readbuffer = chunk + self._readbuffer[self._offset:]
714 self._offset = 0
715 else:
716 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000717
718 # Return up to 512 bytes to reduce allocation overhead for tight loops.
719 return self._readbuffer[self._offset: self._offset + 512]
720
721 def readable(self):
722 return True
723
724 def read(self, n=-1):
725 """Read and return up to n bytes.
726 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000727 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200728 if n is None or n < 0:
729 buf = self._readbuffer[self._offset:]
730 self._readbuffer = b''
731 self._offset = 0
732 while not self._eof:
733 buf += self._read1(self.MAX_N)
734 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000735
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200736 n -= len(self._readbuffer) - self._offset
737 if n < 0:
738 buf = self._readbuffer[self._offset:n]
739 self._offset += len(buf)
740 return buf
741
742 buf = self._readbuffer[self._offset:]
743 self._readbuffer = b''
744 self._offset = 0
745 while n > 0 and not self._eof:
746 data = self._read1(n)
747 if n < len(data):
748 self._readbuffer = data
749 self._offset = n
750 buf += data[:n]
751 break
752 buf += data
753 n -= len(data)
754 return buf
755
756 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000757 # Update the CRC using the given data.
758 if self._expected_crc is None:
759 # No need to compute the CRC if we don't have a reference value
760 return
761 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
762 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200763 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000764 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000765
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 def read1(self, n):
767 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200769 if n is None or n < 0:
770 buf = self._readbuffer[self._offset:]
771 self._readbuffer = b''
772 self._offset = 0
773 data = self._read1(self.MAX_N)
774 buf += data
775 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200777 n -= len(self._readbuffer) - self._offset
778 if n < 0:
779 buf = self._readbuffer[self._offset:n]
780 self._offset += len(buf)
781 return buf
782
783 buf = self._readbuffer[self._offset:]
784 self._readbuffer = b''
785 self._offset = 0
786 if n > 0:
787 data = self._read1(n)
788 if n < len(data):
789 self._readbuffer = data
790 self._offset = n
791 data = data[:n]
792 buf += data
793 return buf
794
795 def _read1(self, n):
796 # Read up to n compressed bytes with at most one read() system call,
797 # decrypt and decompress them.
798 if self._eof or n <= 0:
799 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200802 if self._compress_type == ZIP_DEFLATED:
803 ## Handle unconsumed data.
804 data = self._decompressor.unconsumed_tail
805 if n > len(data):
806 data += self._read2(n - len(data))
807 else:
808 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200810 if self._compress_type == ZIP_STORED:
811 self._eof = self._compress_left <= 0
812 elif self._compress_type == ZIP_DEFLATED:
813 n = max(n, self.MIN_READ_SIZE)
814 data = self._decompressor.decompress(data, n)
815 self._eof = (self._decompressor.eof or
816 self._compress_left <= 0 and
817 not self._decompressor.unconsumed_tail)
818 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000819 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200820 else:
821 data = self._decompressor.decompress(data)
822 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200824 data = data[:self._left]
825 self._left -= len(data)
826 if self._left <= 0:
827 self._eof = True
828 self._update_crc(data)
829 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000830
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200831 def _read2(self, n):
832 if self._compress_left <= 0:
833 return b''
834
835 n = max(n, self.MIN_READ_SIZE)
836 n = min(n, self._compress_left)
837
838 data = self._fileobj.read(n)
839 self._compress_left -= len(data)
840
841 if self._decrypter is not None:
842 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000843 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844
Łukasz Langae94980a2010-11-22 23:31:26 +0000845 def close(self):
846 try:
847 if self._close_fileobj:
848 self._fileobj.close()
849 finally:
850 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000852
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000853class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000854 """ Class with methods to open, read, write, close, list zip files.
855
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000856 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000857
Fred Drake3d9091e2001-03-26 15:49:24 +0000858 file: Either the path to the file, or a file-like object.
859 If it is a path, the file will be opened and closed by ZipFile.
860 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200861 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
862 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000863 allowZip64: if True ZipFile will create files with ZIP64 extensions when
864 needed, otherwise it will raise an exception when this would
865 be necessary.
866
Fred Drake3d9091e2001-03-26 15:49:24 +0000867 """
Fred Drake484d7352000-10-02 21:14:52 +0000868
Fred Drake90eac282001-02-28 05:29:34 +0000869 fp = None # Set here since __del__ checks it
870
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000871 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000872 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000873 if mode not in ("r", "w", "a"):
874 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
875
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200876 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000877
878 self._allowZip64 = allowZip64
879 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000880 self.debug = 0 # Level of printing: 0 through 3
881 self.NameToInfo = {} # Find file info given name
882 self.filelist = [] # List of ZipInfo instances for archive
883 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000884 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000885 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400886 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000887
Fred Drake3d9091e2001-03-26 15:49:24 +0000888 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000889 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000890 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000891 self._filePassed = 0
892 self.filename = file
893 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000894 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000895 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000896 except IOError:
897 if mode == 'a':
898 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000899 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000900 else:
901 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000902 else:
903 self._filePassed = 1
904 self.fp = file
905 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000906
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000908 self._GetContents()
909 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000910 # set the modified flag so central directory gets written
911 # even if no files are added to the archive
912 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000913 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000914 try:
915 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000916 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000917 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000918 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000919 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000920 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000921 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000922
923 # set the modified flag so central directory gets written
924 # even if no files are added to the archive
925 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000926 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000927 if not self._filePassed:
928 self.fp.close()
929 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000930 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000932 def __enter__(self):
933 return self
934
935 def __exit__(self, type, value, traceback):
936 self.close()
937
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000938 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000939 """Read the directory, making sure we close the file if the format
940 is bad."""
941 try:
942 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000943 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000944 if not self._filePassed:
945 self.fp.close()
946 self.fp = None
947 raise
948
949 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000950 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000951 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000952 try:
953 endrec = _EndRecData(fp)
954 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000955 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000956 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000957 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000958 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000959 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000960 size_cd = endrec[_ECD_SIZE] # bytes in central directory
961 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400962 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000963
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000965 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000966 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
967 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000968 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
969
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000970 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000971 inferred = concat + offset_cd
972 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000973 # self.start_dir: Position of start of central directory
974 self.start_dir = offset_cd + concat
975 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000976 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000977 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000978 total = 0
979 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000980 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000981 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000982 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000983 centdir = struct.unpack(structCentralDir, centdir)
984 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000985 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000986 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000987 flags = centdir[5]
988 if flags & 0x800:
989 # UTF-8 file names extension
990 filename = filename.decode('utf-8')
991 else:
992 # Historical ZIP filename encoding
993 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000995 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000996 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
997 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000998 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 (x.create_version, x.create_system, x.extract_version, x.reserved,
1000 x.flag_bits, x.compress_type, t, d,
1001 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001002 if x.extract_version > MAX_EXTRACT_VERSION:
1003 raise NotImplementedError("zip file version %.1f" %
1004 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1006 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001007 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +00001009 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001010
1011 x._decodeExtra()
1012 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013 self.filelist.append(x)
1014 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001015
1016 # update total bytes read from central directory
1017 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1018 + centdir[_CD_EXTRA_FIELD_LENGTH]
1019 + centdir[_CD_COMMENT_LENGTH])
1020
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001022 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001023
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024
1025 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001026 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001027 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028
1029 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001030 """Return a list of class ZipInfo instances for files in the
1031 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 return self.filelist
1033
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001034 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001035 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001036 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1037 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001038 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001039 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001040 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1041 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042
1043 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001044 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001045 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 for zinfo in self.filelist:
1047 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001048 # Read by chunks, to avoid an OverflowError or a
1049 # MemoryError with very large embedded files.
1050 f = self.open(zinfo.filename, "r")
1051 while f.read(chunk_size): # Check CRC-32
1052 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001053 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 return zinfo.filename
1055
1056 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001057 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001058 info = self.NameToInfo.get(name)
1059 if info is None:
1060 raise KeyError(
1061 'There is no item named %r in the archive' % name)
1062
1063 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064
Thomas Wouterscf297e42007-02-23 15:07:44 +00001065 def setpassword(self, pwd):
1066 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001067 if pwd and not isinstance(pwd, bytes):
1068 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1069 if pwd:
1070 self.pwd = pwd
1071 else:
1072 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001073
R David Murrayf50b38a2012-04-12 18:44:58 -04001074 @property
1075 def comment(self):
1076 """The comment text associated with the ZIP file."""
1077 return self._comment
1078
1079 @comment.setter
1080 def comment(self, comment):
1081 if not isinstance(comment, bytes):
1082 raise TypeError("comment: expected bytes, got %s" % type(comment))
1083 # check for valid comment length
1084 if len(comment) >= ZIP_MAX_COMMENT:
1085 if self.debug:
1086 print('Archive comment is too long; truncating to %d bytes'
1087 % ZIP_MAX_COMMENT)
1088 comment = comment[:ZIP_MAX_COMMENT]
1089 self._comment = comment
1090 self._didModify = True
1091
Thomas Wouterscf297e42007-02-23 15:07:44 +00001092 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001093 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001094 with self.open(name, "r", pwd) as fp:
1095 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001096
1097 def open(self, name, mode="r", pwd=None):
1098 """Return file-like object for 'name'."""
1099 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001100 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001101 if pwd and not isinstance(pwd, bytes):
1102 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001104 raise RuntimeError(
1105 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 # Only open a new file for instances where we were not
1108 # given a file object in the constructor
1109 if self._filePassed:
1110 zef_file = self.fp
1111 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001112 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113
Georg Brandlb533e262008-05-25 18:19:30 +00001114 # Make sure we have an info object
1115 if isinstance(name, ZipInfo):
1116 # 'name' is already an info object
1117 zinfo = name
1118 else:
1119 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001120 try:
1121 zinfo = self.getinfo(name)
1122 except KeyError:
1123 if not self._filePassed:
1124 zef_file.close()
1125 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001126 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001127
1128 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001129 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +00001130 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +00001131 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001132
1133 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001134 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001135 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001137
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001138 if zinfo.flag_bits & 0x20:
1139 # Zip 2.7: compressed patched data
1140 raise NotImplementedError("compressed patched data (flag bit 5)")
1141
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001142 if zinfo.flag_bits & 0x40:
1143 # strong encryption
1144 raise NotImplementedError("strong encryption (flag bit 6)")
1145
Georg Brandl5ba11de2011-01-01 10:09:32 +00001146 if zinfo.flag_bits & 0x800:
1147 # UTF-8 filename
1148 fname_str = fname.decode("utf-8")
1149 else:
1150 fname_str = fname.decode("cp437")
1151
1152 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001153 if not self._filePassed:
1154 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +00001155 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +00001156 'File name in directory %r and header %r differ.'
1157 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159 # check for encrypted flag & handle password
1160 is_encrypted = zinfo.flag_bits & 0x1
1161 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001162 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001163 if not pwd:
1164 pwd = self.pwd
1165 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001166 if not self._filePassed:
1167 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +00001168 raise RuntimeError("File %s is encrypted, "
1169 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001170
Thomas Wouterscf297e42007-02-23 15:07:44 +00001171 zd = _ZipDecrypter(pwd)
1172 # The first 12 bytes in the cypher stream is an encryption header
1173 # used to strengthen the algorithm. The first 11 bytes are
1174 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +00001175 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +00001176 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +00001177 header = zef_file.read(12)
1178 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +00001179 if zinfo.flag_bits & 0x8:
1180 # compare against the file type from extended local headers
1181 check_byte = (zinfo._raw_time >> 8) & 0xff
1182 else:
1183 # compare against the CRC otherwise
1184 check_byte = (zinfo.CRC >> 24) & 0xff
1185 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001186 if not self._filePassed:
1187 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +00001188 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001190 return ZipExtFile(zef_file, mode, zinfo, zd,
1191 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192
Christian Heimes790c8232008-01-07 21:14:23 +00001193 def extract(self, member, path=None, pwd=None):
1194 """Extract a member from the archive to the current working directory,
1195 using its full name. Its file information is extracted as accurately
1196 as possible. `member' may be a filename or a ZipInfo object. You can
1197 specify a different directory using `path'.
1198 """
1199 if not isinstance(member, ZipInfo):
1200 member = self.getinfo(member)
1201
1202 if path is None:
1203 path = os.getcwd()
1204
1205 return self._extract_member(member, path, pwd)
1206
1207 def extractall(self, path=None, members=None, pwd=None):
1208 """Extract all members from the archive to the current working
1209 directory. `path' specifies a different directory to extract to.
1210 `members' is optional and must be a subset of the list returned
1211 by namelist().
1212 """
1213 if members is None:
1214 members = self.namelist()
1215
1216 for zipinfo in members:
1217 self.extract(zipinfo, path, pwd)
1218
1219 def _extract_member(self, member, targetpath, pwd):
1220 """Extract the ZipInfo object 'member' to a physical
1221 file on the path targetpath.
1222 """
1223 # build the destination pathname, replacing
1224 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001225 # Strip trailing path separator, unless it represents the root.
1226 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1227 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001228 targetpath = targetpath[:-1]
1229
1230 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001231 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001232 targetpath = os.path.join(targetpath, member.filename[1:])
1233 else:
1234 targetpath = os.path.join(targetpath, member.filename)
1235
1236 targetpath = os.path.normpath(targetpath)
1237
1238 # Create all upper directories if necessary.
1239 upperdirs = os.path.dirname(targetpath)
1240 if upperdirs and not os.path.exists(upperdirs):
1241 os.makedirs(upperdirs)
1242
Martin v. Löwis59e47792009-01-24 14:10:07 +00001243 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001244 if not os.path.isdir(targetpath):
1245 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001246 return targetpath
1247
Georg Brandlb533e262008-05-25 18:19:30 +00001248 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001249 target = open(targetpath, "wb")
1250 shutil.copyfileobj(source, target)
1251 source.close()
1252 target.close()
1253
1254 return targetpath
1255
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001256 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001257 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001258 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001259 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001260 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001262 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001264 raise RuntimeError(
1265 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001266 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001267 if zinfo.file_size > ZIP64_LIMIT:
1268 if not self._allowZip64:
1269 raise LargeZipFile("Filesize would require ZIP64 extensions")
1270 if zinfo.header_offset > ZIP64_LIMIT:
1271 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001272 raise LargeZipFile(
1273 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001274
1275 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001276 """Put the bytes from filename into the archive under the name
1277 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001278 if not self.fp:
1279 raise RuntimeError(
1280 "Attempt to write to ZIP archive that was already closed")
1281
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001283 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001284 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285 date_time = mtime[0:6]
1286 # Create ZipInfo instance to store file information
1287 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001288 arcname = filename
1289 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1290 while arcname[0] in (os.sep, os.altsep):
1291 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001292 if isdir:
1293 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001294 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001295 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001296 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001297 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 else:
Tim Peterse1190062001-01-15 03:34:38 +00001299 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001300
1301 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001302 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001303 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001304 if zinfo.compress_type == ZIP_LZMA:
1305 # Compressed data includes an end-of-stream (EOS) marker
1306 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001307
1308 self._writecheck(zinfo)
1309 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001310
1311 if isdir:
1312 zinfo.file_size = 0
1313 zinfo.compress_size = 0
1314 zinfo.CRC = 0
1315 self.filelist.append(zinfo)
1316 self.NameToInfo[zinfo.filename] = zinfo
1317 self.fp.write(zinfo.FileHeader())
1318 return
1319
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001320 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001321 with open(filename, "rb") as fp:
1322 # Must overwrite CRC and sizes with correct data later
1323 zinfo.CRC = CRC = 0
1324 zinfo.compress_size = compress_size = 0
1325 zinfo.file_size = file_size = 0
1326 self.fp.write(zinfo.FileHeader())
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001327 while 1:
1328 buf = fp.read(1024 * 8)
1329 if not buf:
1330 break
1331 file_size = file_size + len(buf)
1332 CRC = crc32(buf, CRC) & 0xffffffff
1333 if cmpr:
1334 buf = cmpr.compress(buf)
1335 compress_size = compress_size + len(buf)
1336 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 if cmpr:
1338 buf = cmpr.flush()
1339 compress_size = compress_size + len(buf)
1340 self.fp.write(buf)
1341 zinfo.compress_size = compress_size
1342 else:
1343 zinfo.compress_size = file_size
1344 zinfo.CRC = CRC
1345 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001346 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001347 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001348 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001349 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001351 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 self.filelist.append(zinfo)
1353 self.NameToInfo[zinfo.filename] = zinfo
1354
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001355 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001356 """Write a file into the archive. The contents is 'data', which
1357 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1358 it is encoded as UTF-8 first.
1359 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001360 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001361 if isinstance(data, str):
1362 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001363 if not isinstance(zinfo_or_arcname, ZipInfo):
1364 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001365 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001366 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001367 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001368 else:
1369 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001370
1371 if not self.fp:
1372 raise RuntimeError(
1373 "Attempt to write to ZIP archive that was already closed")
1374
Guido van Rossum85825dc2007-08-27 17:03:28 +00001375 zinfo.file_size = len(data) # Uncompressed size
1376 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001377 if compress_type is not None:
1378 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001379 if zinfo.compress_type == ZIP_LZMA:
1380 # Compressed data includes an end-of-stream (EOS) marker
1381 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001382
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001383 self._writecheck(zinfo)
1384 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001385 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001386 co = _get_compressor(zinfo.compress_type)
1387 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001388 data = co.compress(data) + co.flush()
1389 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 else:
1391 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001392 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001393 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001394 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001395 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001397 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001398 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001399 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 self.filelist.append(zinfo)
1401 self.NameToInfo[zinfo.filename] = zinfo
1402
1403 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001404 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001405 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001406
1407 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001408 """Close the file, and for mode "w" and "a" write the ending
1409 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001410 if self.fp is None:
1411 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412
1413 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001414 count = 0
1415 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001416 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001417 count = count + 1
1418 dt = zinfo.date_time
1419 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001420 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001421 extra = []
1422 if zinfo.file_size > ZIP64_LIMIT \
1423 or zinfo.compress_size > ZIP64_LIMIT:
1424 extra.append(zinfo.file_size)
1425 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001426 file_size = 0xffffffff
1427 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001428 else:
1429 file_size = zinfo.file_size
1430 compress_size = zinfo.compress_size
1431
1432 if zinfo.header_offset > ZIP64_LIMIT:
1433 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001434 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001435 else:
1436 header_offset = zinfo.header_offset
1437
1438 extra_data = zinfo.extra
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001439 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001440 if extra:
1441 # Append a ZIP64 field to the extra's
1442 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001443 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001444 1, 8*len(extra), *extra) + extra_data
1445
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001446 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001447
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001448 if zinfo.compress_type == ZIP_BZIP2:
1449 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001450 elif zinfo.compress_type == ZIP_LZMA:
1451 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001452
1453 extract_version = max(min_version, zinfo.extract_version)
1454 create_version = max(min_version, zinfo.create_version)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001455 try:
1456 filename, flag_bits = zinfo._encodeFilenameFlags()
1457 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001458 stringCentralDir, create_version,
1459 zinfo.create_system, extract_version, zinfo.reserved,
1460 flag_bits, zinfo.compress_type, dostime, dosdate,
1461 zinfo.CRC, compress_size, file_size,
1462 len(filename), len(extra_data), len(zinfo.comment),
1463 0, zinfo.internal_attr, zinfo.external_attr,
1464 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001465 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001466 print((structCentralDir, stringCentralDir, create_version,
1467 zinfo.create_system, extract_version, zinfo.reserved,
1468 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1469 zinfo.CRC, compress_size, file_size,
1470 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1471 0, zinfo.internal_attr, zinfo.external_attr,
1472 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001473 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001474 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001475 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001476 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001477 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001478
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001479 pos2 = self.fp.tell()
1480 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001481 centDirCount = count
1482 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001483 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001484 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1485 centDirOffset > ZIP64_LIMIT or
1486 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001487 # Need to write the ZIP64 end-of-archive records
1488 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001489 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001490 44, 45, 45, 0, 0, centDirCount, centDirCount,
1491 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001492 self.fp.write(zip64endrec)
1493
1494 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001495 structEndArchive64Locator,
1496 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001497 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001498 centDirCount = min(centDirCount, 0xFFFF)
1499 centDirSize = min(centDirSize, 0xFFFFFFFF)
1500 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501
Georg Brandl2ee470f2008-07-16 12:55:28 +00001502 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001503 0, 0, centDirCount, centDirCount,
R David Murrayf50b38a2012-04-12 18:44:58 -04001504 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001505 self.fp.write(endrec)
R David Murrayf50b38a2012-04-12 18:44:58 -04001506 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001507 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001508
Fred Drake3d9091e2001-03-26 15:49:24 +00001509 if not self._filePassed:
1510 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001511 self.fp = None
1512
1513
1514class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001515 """Class to create ZIP archives with Python library files and packages."""
1516
Georg Brandl8334fd92010-12-04 10:26:46 +00001517 def __init__(self, file, mode="r", compression=ZIP_STORED,
1518 allowZip64=False, optimize=-1):
1519 ZipFile.__init__(self, file, mode=mode, compression=compression,
1520 allowZip64=allowZip64)
1521 self._optimize = optimize
1522
Georg Brandlfe991052009-09-16 15:54:04 +00001523 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001524 """Add all files from "pathname" to the ZIP archive.
1525
Fred Drake484d7352000-10-02 21:14:52 +00001526 If pathname is a package directory, search the directory and
1527 all package subdirectories recursively for all *.py and enter
1528 the modules into the archive. If pathname is a plain
1529 directory, listdir *.py and enter all modules. Else, pathname
1530 must be a Python *.py file and the module will be put into the
1531 archive. Added modules are always module.pyo or module.pyc.
1532 This method will compile the module.py into module.pyc if
1533 necessary.
1534 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001535 dir, name = os.path.split(pathname)
1536 if os.path.isdir(pathname):
1537 initname = os.path.join(pathname, "__init__.py")
1538 if os.path.isfile(initname):
1539 # This is a package directory, add it
1540 if basename:
1541 basename = "%s/%s" % (basename, name)
1542 else:
1543 basename = name
1544 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001545 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001546 fname, arcname = self._get_codename(initname[0:-3], basename)
1547 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001548 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001549 self.write(fname, arcname)
1550 dirlist = os.listdir(pathname)
1551 dirlist.remove("__init__.py")
1552 # Add all *.py files and package subdirectories
1553 for filename in dirlist:
1554 path = os.path.join(pathname, filename)
1555 root, ext = os.path.splitext(filename)
1556 if os.path.isdir(path):
1557 if os.path.isfile(os.path.join(path, "__init__.py")):
1558 # This is a package directory, add it
1559 self.writepy(path, basename) # Recursive call
1560 elif ext == ".py":
1561 fname, arcname = self._get_codename(path[0:-3],
1562 basename)
1563 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001564 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001565 self.write(fname, arcname)
1566 else:
1567 # This is NOT a package directory, add its files at top level
1568 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001569 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001570 for filename in os.listdir(pathname):
1571 path = os.path.join(pathname, filename)
1572 root, ext = os.path.splitext(filename)
1573 if ext == ".py":
1574 fname, arcname = self._get_codename(path[0:-3],
1575 basename)
1576 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001577 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001578 self.write(fname, arcname)
1579 else:
1580 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001581 raise RuntimeError(
1582 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001583 fname, arcname = self._get_codename(pathname[0:-3], basename)
1584 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001585 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001586 self.write(fname, arcname)
1587
1588 def _get_codename(self, pathname, basename):
1589 """Return (filename, archivename) for the path.
1590
Fred Drake484d7352000-10-02 21:14:52 +00001591 Given a module name path, return the correct file path and
1592 archive name, compiling if necessary. For example, given
1593 /python/lib/string, return (/python/lib/string.pyc, string).
1594 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001595 def _compile(file, optimize=-1):
1596 import py_compile
1597 if self.debug:
1598 print("Compiling", file)
1599 try:
1600 py_compile.compile(file, doraise=True, optimize=optimize)
1601 except py_compile.PyCompileError as error:
1602 print(err.msg)
1603 return False
1604 return True
1605
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001606 file_py = pathname + ".py"
1607 file_pyc = pathname + ".pyc"
1608 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001609 pycache_pyc = imp.cache_from_source(file_py, True)
1610 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001611 if self._optimize == -1:
1612 # legacy mode: use whatever file is present
1613 if (os.path.isfile(file_pyo) and
1614 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1615 # Use .pyo file.
1616 arcname = fname = file_pyo
1617 elif (os.path.isfile(file_pyc) and
1618 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1619 # Use .pyc file.
1620 arcname = fname = file_pyc
1621 elif (os.path.isfile(pycache_pyc) and
1622 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1623 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1624 # file name in the archive.
1625 fname = pycache_pyc
1626 arcname = file_pyc
1627 elif (os.path.isfile(pycache_pyo) and
1628 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1629 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1630 # file name in the archive.
1631 fname = pycache_pyo
1632 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001633 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001634 # Compile py into PEP 3147 pyc file.
1635 if _compile(file_py):
1636 fname = (pycache_pyc if __debug__ else pycache_pyo)
1637 arcname = (file_pyc if __debug__ else file_pyo)
1638 else:
1639 fname = arcname = file_py
1640 else:
1641 # new mode: use given optimization level
1642 if self._optimize == 0:
1643 fname = pycache_pyc
1644 arcname = file_pyc
1645 else:
1646 fname = pycache_pyo
1647 arcname = file_pyo
1648 if not (os.path.isfile(fname) and
1649 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1650 if not _compile(file_py, optimize=self._optimize):
1651 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001652 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001653 if basename:
1654 archivename = "%s/%s" % (basename, archivename)
1655 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001656
1657
1658def main(args = None):
1659 import textwrap
1660 USAGE=textwrap.dedent("""\
1661 Usage:
1662 zipfile.py -l zipfile.zip # Show listing of a zipfile
1663 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1664 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1665 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1666 """)
1667 if args is None:
1668 args = sys.argv[1:]
1669
1670 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001671 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001672 sys.exit(1)
1673
1674 if args[0] == '-l':
1675 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001676 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001677 sys.exit(1)
1678 zf = ZipFile(args[1], 'r')
1679 zf.printdir()
1680 zf.close()
1681
1682 elif args[0] == '-t':
1683 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001684 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001685 sys.exit(1)
1686 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001687 badfile = zf.testzip()
1688 if badfile:
1689 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001690 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001691
1692 elif args[0] == '-e':
1693 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001694 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001695 sys.exit(1)
1696
1697 zf = ZipFile(args[1], 'r')
1698 out = args[2]
1699 for path in zf.namelist():
1700 if path.startswith('./'):
1701 tgt = os.path.join(out, path[2:])
1702 else:
1703 tgt = os.path.join(out, path)
1704
1705 tgtdir = os.path.dirname(tgt)
1706 if not os.path.exists(tgtdir):
1707 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001708 with open(tgt, 'wb') as fp:
1709 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001710 zf.close()
1711
1712 elif args[0] == '-c':
1713 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001714 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001715 sys.exit(1)
1716
1717 def addToZip(zf, path, zippath):
1718 if os.path.isfile(path):
1719 zf.write(path, zippath, ZIP_DEFLATED)
1720 elif os.path.isdir(path):
1721 for nm in os.listdir(path):
1722 addToZip(zf,
1723 os.path.join(path, nm), os.path.join(zippath, nm))
1724 # else: ignore
1725
1726 zf = ZipFile(args[1], 'w', allowZip64=True)
1727 for src in args[2:]:
1728 addToZip(zf, src, os.path.basename(src))
1729
1730 zf.close()
1731
1732if __name__ == "__main__":
1733 main()