blob: c53b127056c01605db4d4022dc109b0c6a441550 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
30__all__ = ["BadZipFile", "BadZipfile", "error",
Georg Brandl5c016782012-05-01 09:00:59 +020031 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2",
Georg Brandl4d540882010-10-28 06:42:33 +000032 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000033
Georg Brandl4d540882010-10-28 06:42:33 +000034class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000036
37
38class LargeZipFile(Exception):
39 """
40 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
41 and those extensions are disabled.
42 """
43
Georg Brandl4d540882010-10-28 06:42:33 +000044error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000047ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000048ZIP_FILECOUNT_LIMIT = 1 << 16
49ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000050
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051# constants for Zip file compression methods
52ZIP_STORED = 0
53ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020054ZIP_BZIP2 = 12
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# Other ZIP compression methods not supported
56
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020057DEFAULT_VERSION = 20
58ZIP64_VERSION = 45
59BZIP2_VERSION = 46
Martin v. Löwisd099b562012-05-01 14:08:22 +020060# we recognize (but not necessarily support) all features up to that version
61MAX_EXTRACT_VERSION = 46
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062
Martin v. Löwisb09b8442008-07-03 14:13:42 +000063# Below are some formats and associated data for reading/writing headers using
64# the struct module. The names and structures of headers/records are those used
65# in the PKWARE description of the ZIP file format:
66# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
67# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# The "end of central directory" structure, magic number, size, and indices
70# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000071structEndArchive = b"<4s4H2LH"
72stringEndArchive = b"PK\005\006"
73sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074
75_ECD_SIGNATURE = 0
76_ECD_DISK_NUMBER = 1
77_ECD_DISK_START = 2
78_ECD_ENTRIES_THIS_DISK = 3
79_ECD_ENTRIES_TOTAL = 4
80_ECD_SIZE = 5
81_ECD_OFFSET = 6
82_ECD_COMMENT_SIZE = 7
83# These last two indices are not part of the structure as defined in the
84# spec, but they are used internally by this module as a convenience
85_ECD_COMMENT = 8
86_ECD_LOCATION = 9
87
88# The "central directory" structure, magic number, size, and indices
89# of entries in the structure (section V.F in the format document)
90structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000091stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092sizeCentralDir = struct.calcsize(structCentralDir)
93
Fred Drake3e038e52001-02-28 17:56:26 +000094# indexes of entries in the central directory structure
95_CD_SIGNATURE = 0
96_CD_CREATE_VERSION = 1
97_CD_CREATE_SYSTEM = 2
98_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000100_CD_FLAG_BITS = 5
101_CD_COMPRESS_TYPE = 6
102_CD_TIME = 7
103_CD_DATE = 8
104_CD_CRC = 9
105_CD_COMPRESSED_SIZE = 10
106_CD_UNCOMPRESSED_SIZE = 11
107_CD_FILENAME_LENGTH = 12
108_CD_EXTRA_FIELD_LENGTH = 13
109_CD_COMMENT_LENGTH = 14
110_CD_DISK_NUMBER_START = 15
111_CD_INTERNAL_FILE_ATTRIBUTES = 16
112_CD_EXTERNAL_FILE_ATTRIBUTES = 17
113_CD_LOCAL_HEADER_OFFSET = 18
114
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115# The "local file header" structure, magic number, size, and indices
116# (section V.A in the format document)
117structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000119sizeFileHeader = struct.calcsize(structFileHeader)
120
Fred Drake3e038e52001-02-28 17:56:26 +0000121_FH_SIGNATURE = 0
122_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000124_FH_GENERAL_PURPOSE_FLAG_BITS = 3
125_FH_COMPRESSION_METHOD = 4
126_FH_LAST_MOD_TIME = 5
127_FH_LAST_MOD_DATE = 6
128_FH_CRC = 7
129_FH_COMPRESSED_SIZE = 8
130_FH_UNCOMPRESSED_SIZE = 9
131_FH_FILENAME_LENGTH = 10
132_FH_EXTRA_FIELD_LENGTH = 11
133
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000135structEndArchive64Locator = "<4sLQL"
136stringEndArchive64Locator = b"PK\x06\x07"
137sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000138
139# The "Zip64 end of central directory" record, magic number, size, and indices
140# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64 = "<4sQ2H2L4Q"
142stringEndArchive64 = b"PK\x06\x06"
143sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145_CD64_SIGNATURE = 0
146_CD64_DIRECTORY_RECSIZE = 1
147_CD64_CREATE_VERSION = 2
148_CD64_EXTRACT_VERSION = 3
149_CD64_DISK_NUMBER = 4
150_CD64_DISK_NUMBER_START = 5
151_CD64_NUMBER_ENTRIES_THIS_DISK = 6
152_CD64_NUMBER_ENTRIES_TOTAL = 7
153_CD64_DIRECTORY_SIZE = 8
154_CD64_OFFSET_START_CENTDIR = 9
155
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000156def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000157 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000158 if _EndRecData(fp):
159 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000160 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000162 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164def is_zipfile(filename):
165 """Quickly see if a file is a ZIP file by checking the magic number.
166
167 The filename argument may be a file or file-like object too.
168 """
169 result = False
170 try:
171 if hasattr(filename, "read"):
172 result = _check_zipfile(fp=filename)
173 else:
174 with open(filename, "rb") as fp:
175 result = _check_zipfile(fp)
176 except IOError:
177 pass
178 return result
179
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000180def _EndRecData64(fpin, offset, endrec):
181 """
182 Read the ZIP64 end-of-archive records and use that to update endrec
183 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000184 try:
185 fpin.seek(offset - sizeEndCentDir64Locator, 2)
186 except IOError:
187 # If the seek fails, the file is not large enough to contain a ZIP64
188 # end-of-archive record, so just return the end record we were given.
189 return endrec
190
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000191 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000192 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
193 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000194 return endrec
195
196 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000197 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000198
199 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000200 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
201 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 sig, sz, create_version, read_version, disk_num, disk_dir, \
203 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000204 struct.unpack(structEndArchive64, data)
205 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206 return endrec
207
208 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000209 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210 endrec[_ECD_DISK_NUMBER] = disk_num
211 endrec[_ECD_DISK_START] = disk_dir
212 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
213 endrec[_ECD_ENTRIES_TOTAL] = dircount2
214 endrec[_ECD_SIZE] = dirsize
215 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 return endrec
217
218
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219def _EndRecData(fpin):
220 """Return data from the "End of Central Directory" record, or None.
221
222 The data is a list of the nine items in the ZIP "End of central dir"
223 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000224
225 # Determine file size
226 fpin.seek(0, 2)
227 filesize = fpin.tell()
228
229 # Check to see if this is ZIP file with no archive comment (the
230 # "end of central directory" structure should be the last item in the
231 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000232 try:
233 fpin.seek(-sizeEndCentDir, 2)
234 except IOError:
235 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000237 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000239 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000240 endrec=list(endrec)
241
242 # Append a blank comment and record start offset
243 endrec.append(b"")
244 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000246 # Try to read the "Zip64 end of central directory" structure
247 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000248
249 # Either this is not a ZIP file, or it is a ZIP file with an archive
250 # comment. Search the end of the file for the "end of central directory"
251 # record signature. The comment is the last item in the ZIP file and may be
252 # up to 64K long. It is assumed that the "end of central directory" magic
253 # number does not appear in the comment.
254 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
255 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000256 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000257 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258 if start >= 0:
259 # found the magic number; attempt to unpack and interpret
260 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000261 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400262 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
263 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
264 endrec.append(comment)
265 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000266
R David Murray4fbb9db2011-06-09 15:50:51 -0400267 # Try to read the "Zip64 end of central directory" structure
268 return _EndRecData64(fpin, maxCommentStart + start - filesize,
269 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000270
271 # Unable to find a valid end of central directory structure
272 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273
Fred Drake484d7352000-10-02 21:14:52 +0000274
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000275class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000276 """Class with attributes describing each file in the ZIP archive."""
277
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000278 __slots__ = (
279 'orig_filename',
280 'filename',
281 'date_time',
282 'compress_type',
283 'comment',
284 'extra',
285 'create_system',
286 'create_version',
287 'extract_version',
288 'reserved',
289 'flag_bits',
290 'volume',
291 'internal_attr',
292 'external_attr',
293 'header_offset',
294 'CRC',
295 'compress_size',
296 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000297 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 )
299
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000302
303 # Terminate the file name at the first null byte. Null bytes in file
304 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000305 null_byte = filename.find(chr(0))
306 if null_byte >= 0:
307 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000308 # This is used to ensure paths in generated ZIP files always use
309 # forward slashes as the directory separator, as required by the
310 # ZIP format specification.
311 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000312 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000313
Greg Ward8e36d282003-06-18 00:53:06 +0000314 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000315 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800316
317 if date_time[0] < 1980:
318 raise ValueError('ZIP does not support timestamps before 1980')
319
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000321 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000322 self.comment = b"" # Comment for each file
323 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000324 if sys.platform == 'win32':
325 self.create_system = 0 # System which created ZIP archive
326 else:
327 # Assume everything else is unix-y
328 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200329 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
330 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.reserved = 0 # Must be zero
332 self.flag_bits = 0 # ZIP flag bits
333 self.volume = 0 # Volume number of file header
334 self.internal_attr = 0 # Internal attributes
335 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000337 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000338 # CRC CRC-32 of the uncompressed file
339 # compress_size Size of the compressed file
340 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000341
342 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000343 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000344 dt = self.date_time
345 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000346 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000347 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000348 # Set these to zero because we write them after the file data
349 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000350 else:
Tim Peterse1190062001-01-15 03:34:38 +0000351 CRC = self.CRC
352 compress_size = self.compress_size
353 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354
355 extra = self.extra
356
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200357 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000358 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
359 # File is larger than what fits into a 4 byte integer,
360 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 extra = extra + struct.pack(fmt,
363 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000364 file_size = 0xffffffff
365 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200366 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000367
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200368 if self.compress_type == ZIP_BZIP2:
369 min_version = max(BZIP2_VERSION, min_version)
370
371 self.extract_version = max(min_version, self.extract_version)
372 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000373 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000374 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000375 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000376 self.compress_type, dostime, dosdate, CRC,
377 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000378 len(filename), len(extra))
379 return header + filename + extra
380
381 def _encodeFilenameFlags(self):
382 try:
383 return self.filename.encode('ascii'), self.flag_bits
384 except UnicodeEncodeError:
385 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386
387 def _decodeExtra(self):
388 # Try to decode the extra field.
389 extra = self.extra
390 unpack = struct.unpack
391 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000392 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393 if tp == 1:
394 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000395 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000397 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000398 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000399 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400 elif ln == 0:
401 counts = ()
402 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000403 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404
405 idx = 0
406
407 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000408 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409 self.file_size = counts[idx]
410 idx += 1
411
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000412 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413 self.compress_size = counts[idx]
414 idx += 1
415
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000416 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000417 old = self.header_offset
418 self.header_offset = counts[idx]
419 idx+=1
420
421 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000422
423
Thomas Wouterscf297e42007-02-23 15:07:44 +0000424class _ZipDecrypter:
425 """Class to handle decryption of files stored within a ZIP archive.
426
427 ZIP supports a password-based form of encryption. Even though known
428 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000429 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000430
431 Usage:
432 zd = _ZipDecrypter(mypwd)
433 plain_char = zd(cypher_char)
434 plain_text = map(zd, cypher_text)
435 """
436
437 def _GenerateCRCTable():
438 """Generate a CRC-32 table.
439
440 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
441 internal keys. We noticed that a direct implementation is faster than
442 relying on binascii.crc32().
443 """
444 poly = 0xedb88320
445 table = [0] * 256
446 for i in range(256):
447 crc = i
448 for j in range(8):
449 if crc & 1:
450 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
451 else:
452 crc = ((crc >> 1) & 0x7FFFFFFF)
453 table[i] = crc
454 return table
455 crctable = _GenerateCRCTable()
456
457 def _crc32(self, ch, crc):
458 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000460
461 def __init__(self, pwd):
462 self.key0 = 305419896
463 self.key1 = 591751049
464 self.key2 = 878082192
465 for p in pwd:
466 self._UpdateKeys(p)
467
468 def _UpdateKeys(self, c):
469 self.key0 = self._crc32(c, self.key0)
470 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
471 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000472 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000473
474 def __call__(self, c):
475 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000476 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000477 k = self.key2 | 2
478 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000479 self._UpdateKeys(c)
480 return c
481
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200482
483def _check_compression(compression):
484 if compression == ZIP_STORED:
485 pass
486 elif compression == ZIP_DEFLATED:
487 if not zlib:
488 raise RuntimeError(
489 "Compression requires the (missing) zlib module")
490 elif compression == ZIP_BZIP2:
491 if not bz2:
492 raise RuntimeError(
493 "Compression requires the (missing) bz2 module")
494 else:
495 raise RuntimeError("That compression method is not supported")
496
497
498def _get_compressor(compress_type):
499 if compress_type == ZIP_DEFLATED:
500 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
501 zlib.DEFLATED, -15)
502 elif compress_type == ZIP_BZIP2:
503 return bz2.BZ2Compressor()
504 else:
505 return None
506
507
508def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200509 if compress_type == ZIP_STORED:
510 return None
511 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200512 return zlib.decompressobj(-15)
513 elif compress_type == ZIP_BZIP2:
514 return bz2.BZ2Decompressor()
515 else:
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200516 unknown_compressors = {
517 1: 'shrink',
518 2: 'reduce',
519 3: 'reduce',
520 4: 'reduce',
521 5: 'reduce',
522 6: 'implode',
523 9: 'enhanced deflate',
524 10: 'implode',
525 14: 'lzma',
526 }
527 descr = unknown_compressors.get(compress_type)
528 if descr:
529 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
530 else:
531 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200532
533
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000534class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000535 """File-like object for reading an archive member.
536 Is returned by ZipFile.open().
537 """
538
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000539 # Max size supported by decompressor.
540 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000541
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000542 # Read from compressed files in 4k blocks.
543 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000544
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000545 # Search for universal newlines or line chunks.
546 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
547
Łukasz Langae94980a2010-11-22 23:31:26 +0000548 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
549 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000550 self._fileobj = fileobj
551 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000552 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000553
Ezio Melotti92b47432010-01-28 01:44:41 +0000554 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000555 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200556 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000557
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200558 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000559
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200560 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000561 self._readbuffer = b''
562 self._offset = 0
563
564 self._universal = 'U' in mode
565 self.newlines = None
566
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000567 # Adjust read size for encrypted files since the first 12 bytes
568 # are for the encryption/password information.
569 if self._decrypter is not None:
570 self._compress_left -= 12
571
572 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573 self.name = zipinfo.filename
574
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000575 if hasattr(zipinfo, 'CRC'):
576 self._expected_crc = zipinfo.CRC
577 self._running_crc = crc32(b'') & 0xffffffff
578 else:
579 self._expected_crc = None
580
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000581 def readline(self, limit=-1):
582 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000583
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000584 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000585 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000586
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000587 if not self._universal and limit < 0:
588 # Shortcut common case - newline found in buffer.
589 i = self._readbuffer.find(b'\n', self._offset) + 1
590 if i > 0:
591 line = self._readbuffer[self._offset: i]
592 self._offset = i
593 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000594
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000595 if not self._universal:
596 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000598 line = b''
599 while limit < 0 or len(line) < limit:
600 readahead = self.peek(2)
601 if readahead == b'':
602 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000603
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000604 #
605 # Search for universal newlines or line chunks.
606 #
607 # The pattern returns either a line chunk or a newline, but not
608 # both. Combined with peek(2), we are assured that the sequence
609 # '\r\n' is always retrieved completely and never split into
610 # separate newlines - '\r', '\n' due to coincidental readaheads.
611 #
612 match = self.PATTERN.search(readahead)
613 newline = match.group('newline')
614 if newline is not None:
615 if self.newlines is None:
616 self.newlines = []
617 if newline not in self.newlines:
618 self.newlines.append(newline)
619 self._offset += len(newline)
620 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000621
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 chunk = match.group('chunk')
623 if limit >= 0:
624 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000625
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000626 self._offset += len(chunk)
627 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000629 return line
630
631 def peek(self, n=1):
632 """Returns buffered bytes without advancing the position."""
633 if n > len(self._readbuffer) - self._offset:
634 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200635 if len(chunk) > self._offset:
636 self._readbuffer = chunk + self._readbuffer[self._offset:]
637 self._offset = 0
638 else:
639 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000640
641 # Return up to 512 bytes to reduce allocation overhead for tight loops.
642 return self._readbuffer[self._offset: self._offset + 512]
643
644 def readable(self):
645 return True
646
647 def read(self, n=-1):
648 """Read and return up to n bytes.
649 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200651 if n is None or n < 0:
652 buf = self._readbuffer[self._offset:]
653 self._readbuffer = b''
654 self._offset = 0
655 while not self._eof:
656 buf += self._read1(self.MAX_N)
657 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000658
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200659 n -= len(self._readbuffer) - self._offset
660 if n < 0:
661 buf = self._readbuffer[self._offset:n]
662 self._offset += len(buf)
663 return buf
664
665 buf = self._readbuffer[self._offset:]
666 self._readbuffer = b''
667 self._offset = 0
668 while n > 0 and not self._eof:
669 data = self._read1(n)
670 if n < len(data):
671 self._readbuffer = data
672 self._offset = n
673 buf += data[:n]
674 break
675 buf += data
676 n -= len(data)
677 return buf
678
679 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000680 # Update the CRC using the given data.
681 if self._expected_crc is None:
682 # No need to compute the CRC if we don't have a reference value
683 return
684 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
685 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200686 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000687 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000688
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000689 def read1(self, n):
690 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000691
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 if n is None or n < 0:
693 buf = self._readbuffer[self._offset:]
694 self._readbuffer = b''
695 self._offset = 0
696 data = self._read1(self.MAX_N)
697 buf += data
698 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000699
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200700 n -= len(self._readbuffer) - self._offset
701 if n < 0:
702 buf = self._readbuffer[self._offset:n]
703 self._offset += len(buf)
704 return buf
705
706 buf = self._readbuffer[self._offset:]
707 self._readbuffer = b''
708 self._offset = 0
709 if n > 0:
710 data = self._read1(n)
711 if n < len(data):
712 self._readbuffer = data
713 self._offset = n
714 data = data[:n]
715 buf += data
716 return buf
717
718 def _read1(self, n):
719 # Read up to n compressed bytes with at most one read() system call,
720 # decrypt and decompress them.
721 if self._eof or n <= 0:
722 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000723
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000724 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200725 if self._compress_type == ZIP_DEFLATED:
726 ## Handle unconsumed data.
727 data = self._decompressor.unconsumed_tail
728 if n > len(data):
729 data += self._read2(n - len(data))
730 else:
731 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200733 if self._compress_type == ZIP_STORED:
734 self._eof = self._compress_left <= 0
735 elif self._compress_type == ZIP_DEFLATED:
736 n = max(n, self.MIN_READ_SIZE)
737 data = self._decompressor.decompress(data, n)
738 self._eof = (self._decompressor.eof or
739 self._compress_left <= 0 and
740 not self._decompressor.unconsumed_tail)
741 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000742 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200743 else:
744 data = self._decompressor.decompress(data)
745 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000746
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200747 data = data[:self._left]
748 self._left -= len(data)
749 if self._left <= 0:
750 self._eof = True
751 self._update_crc(data)
752 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000753
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200754 def _read2(self, n):
755 if self._compress_left <= 0:
756 return b''
757
758 n = max(n, self.MIN_READ_SIZE)
759 n = min(n, self._compress_left)
760
761 data = self._fileobj.read(n)
762 self._compress_left -= len(data)
763
764 if self._decrypter is not None:
765 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000766 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000767
Łukasz Langae94980a2010-11-22 23:31:26 +0000768 def close(self):
769 try:
770 if self._close_fileobj:
771 self._fileobj.close()
772 finally:
773 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000777 """ Class with methods to open, read, write, close, list zip files.
778
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000779 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000780
Fred Drake3d9091e2001-03-26 15:49:24 +0000781 file: Either the path to the file, or a file-like object.
782 If it is a path, the file will be opened and closed by ZipFile.
783 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200784 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or
785 ZIP_BZIP2 (requires bz2).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000786 allowZip64: if True ZipFile will create files with ZIP64 extensions when
787 needed, otherwise it will raise an exception when this would
788 be necessary.
789
Fred Drake3d9091e2001-03-26 15:49:24 +0000790 """
Fred Drake484d7352000-10-02 21:14:52 +0000791
Fred Drake90eac282001-02-28 05:29:34 +0000792 fp = None # Set here since __del__ checks it
793
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000794 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000795 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000796 if mode not in ("r", "w", "a"):
797 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
798
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000800
801 self._allowZip64 = allowZip64
802 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000803 self.debug = 0 # Level of printing: 0 through 3
804 self.NameToInfo = {} # Find file info given name
805 self.filelist = [] # List of ZipInfo instances for archive
806 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000807 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000808 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400809 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000810
Fred Drake3d9091e2001-03-26 15:49:24 +0000811 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000812 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000813 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000814 self._filePassed = 0
815 self.filename = file
816 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000817 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000818 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000819 except IOError:
820 if mode == 'a':
821 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000822 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000823 else:
824 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000825 else:
826 self._filePassed = 1
827 self.fp = file
828 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000829
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000831 self._GetContents()
832 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000833 # set the modified flag so central directory gets written
834 # even if no files are added to the archive
835 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000837 try:
838 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000839 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000841 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000842 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000843 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000844 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000845
846 # set the modified flag so central directory gets written
847 # even if no files are added to the archive
848 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000850 if not self._filePassed:
851 self.fp.close()
852 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000853 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000855 def __enter__(self):
856 return self
857
858 def __exit__(self, type, value, traceback):
859 self.close()
860
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000862 """Read the directory, making sure we close the file if the format
863 is bad."""
864 try:
865 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000866 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000867 if not self._filePassed:
868 self.fp.close()
869 self.fp = None
870 raise
871
872 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000873 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000874 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000875 try:
876 endrec = _EndRecData(fp)
877 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000878 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000879 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000880 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000882 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000883 size_cd = endrec[_ECD_SIZE] # bytes in central directory
884 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400885 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000886
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000887 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000888 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000889 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
890 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000891 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
892
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000893 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000894 inferred = concat + offset_cd
895 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896 # self.start_dir: Position of start of central directory
897 self.start_dir = offset_cd + concat
898 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000899 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000900 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000901 total = 0
902 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000903 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000904 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000905 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000906 centdir = struct.unpack(structCentralDir, centdir)
907 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000908 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000909 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000910 flags = centdir[5]
911 if flags & 0x800:
912 # UTF-8 file names extension
913 filename = filename.decode('utf-8')
914 else:
915 # Historical ZIP filename encoding
916 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000917 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000918 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000919 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
920 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000921 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000922 (x.create_version, x.create_system, x.extract_version, x.reserved,
923 x.flag_bits, x.compress_type, t, d,
924 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +0200925 if x.extract_version > MAX_EXTRACT_VERSION:
926 raise NotImplementedError("zip file version %.1f" %
927 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000928 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
929 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000930 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000932 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000933
934 x._decodeExtra()
935 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936 self.filelist.append(x)
937 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000938
939 # update total bytes read from central directory
940 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
941 + centdir[_CD_EXTRA_FIELD_LENGTH]
942 + centdir[_CD_COMMENT_LENGTH])
943
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000944 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000945 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000946
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000947
948 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000949 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -0600950 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000951
952 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000953 """Return a list of class ZipInfo instances for files in the
954 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000955 return self.filelist
956
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000957 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000958 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000959 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
960 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000961 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000962 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000963 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
964 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965
966 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000967 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000968 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000969 for zinfo in self.filelist:
970 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000971 # Read by chunks, to avoid an OverflowError or a
972 # MemoryError with very large embedded files.
973 f = self.open(zinfo.filename, "r")
974 while f.read(chunk_size): # Check CRC-32
975 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000976 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000977 return zinfo.filename
978
979 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000980 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000981 info = self.NameToInfo.get(name)
982 if info is None:
983 raise KeyError(
984 'There is no item named %r in the archive' % name)
985
986 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987
Thomas Wouterscf297e42007-02-23 15:07:44 +0000988 def setpassword(self, pwd):
989 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000990 if pwd and not isinstance(pwd, bytes):
991 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
992 if pwd:
993 self.pwd = pwd
994 else:
995 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000996
R David Murrayf50b38a2012-04-12 18:44:58 -0400997 @property
998 def comment(self):
999 """The comment text associated with the ZIP file."""
1000 return self._comment
1001
1002 @comment.setter
1003 def comment(self, comment):
1004 if not isinstance(comment, bytes):
1005 raise TypeError("comment: expected bytes, got %s" % type(comment))
1006 # check for valid comment length
1007 if len(comment) >= ZIP_MAX_COMMENT:
1008 if self.debug:
1009 print('Archive comment is too long; truncating to %d bytes'
1010 % ZIP_MAX_COMMENT)
1011 comment = comment[:ZIP_MAX_COMMENT]
1012 self._comment = comment
1013 self._didModify = True
1014
Thomas Wouterscf297e42007-02-23 15:07:44 +00001015 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001016 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001017 with self.open(name, "r", pwd) as fp:
1018 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001019
1020 def open(self, name, mode="r", pwd=None):
1021 """Return file-like object for 'name'."""
1022 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001023 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001024 if pwd and not isinstance(pwd, bytes):
1025 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001027 raise RuntimeError(
1028 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001029
Guido van Rossumd8faa362007-04-27 19:54:29 +00001030 # Only open a new file for instances where we were not
1031 # given a file object in the constructor
1032 if self._filePassed:
1033 zef_file = self.fp
1034 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001035 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001036
Georg Brandlb533e262008-05-25 18:19:30 +00001037 # Make sure we have an info object
1038 if isinstance(name, ZipInfo):
1039 # 'name' is already an info object
1040 zinfo = name
1041 else:
1042 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001043 try:
1044 zinfo = self.getinfo(name)
1045 except KeyError:
1046 if not self._filePassed:
1047 zef_file.close()
1048 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001050
1051 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001052 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +00001053 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +00001054 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001055
1056 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001057 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001058 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001059 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001060
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001061 if zinfo.flag_bits & 0x20:
1062 # Zip 2.7: compressed patched data
1063 raise NotImplementedError("compressed patched data (flag bit 5)")
1064
Georg Brandl5ba11de2011-01-01 10:09:32 +00001065 if zinfo.flag_bits & 0x800:
1066 # UTF-8 filename
1067 fname_str = fname.decode("utf-8")
1068 else:
1069 fname_str = fname.decode("cp437")
1070
1071 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001072 if not self._filePassed:
1073 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +00001074 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +00001075 'File name in directory %r and header %r differ.'
1076 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001077
Guido van Rossumd8faa362007-04-27 19:54:29 +00001078 # check for encrypted flag & handle password
1079 is_encrypted = zinfo.flag_bits & 0x1
1080 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001081 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001082 if not pwd:
1083 pwd = self.pwd
1084 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001085 if not self._filePassed:
1086 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +00001087 raise RuntimeError("File %s is encrypted, "
1088 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001089
Thomas Wouterscf297e42007-02-23 15:07:44 +00001090 zd = _ZipDecrypter(pwd)
1091 # The first 12 bytes in the cypher stream is an encryption header
1092 # used to strengthen the algorithm. The first 11 bytes are
1093 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +00001094 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +00001095 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +00001096 header = zef_file.read(12)
1097 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +00001098 if zinfo.flag_bits & 0x8:
1099 # compare against the file type from extended local headers
1100 check_byte = (zinfo._raw_time >> 8) & 0xff
1101 else:
1102 # compare against the CRC otherwise
1103 check_byte = (zinfo.CRC >> 24) & 0xff
1104 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001105 if not self._filePassed:
1106 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +00001107 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001108
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001109 return ZipExtFile(zef_file, mode, zinfo, zd,
1110 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111
Christian Heimes790c8232008-01-07 21:14:23 +00001112 def extract(self, member, path=None, pwd=None):
1113 """Extract a member from the archive to the current working directory,
1114 using its full name. Its file information is extracted as accurately
1115 as possible. `member' may be a filename or a ZipInfo object. You can
1116 specify a different directory using `path'.
1117 """
1118 if not isinstance(member, ZipInfo):
1119 member = self.getinfo(member)
1120
1121 if path is None:
1122 path = os.getcwd()
1123
1124 return self._extract_member(member, path, pwd)
1125
1126 def extractall(self, path=None, members=None, pwd=None):
1127 """Extract all members from the archive to the current working
1128 directory. `path' specifies a different directory to extract to.
1129 `members' is optional and must be a subset of the list returned
1130 by namelist().
1131 """
1132 if members is None:
1133 members = self.namelist()
1134
1135 for zipinfo in members:
1136 self.extract(zipinfo, path, pwd)
1137
1138 def _extract_member(self, member, targetpath, pwd):
1139 """Extract the ZipInfo object 'member' to a physical
1140 file on the path targetpath.
1141 """
1142 # build the destination pathname, replacing
1143 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001144 # Strip trailing path separator, unless it represents the root.
1145 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1146 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001147 targetpath = targetpath[:-1]
1148
1149 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001150 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001151 targetpath = os.path.join(targetpath, member.filename[1:])
1152 else:
1153 targetpath = os.path.join(targetpath, member.filename)
1154
1155 targetpath = os.path.normpath(targetpath)
1156
1157 # Create all upper directories if necessary.
1158 upperdirs = os.path.dirname(targetpath)
1159 if upperdirs and not os.path.exists(upperdirs):
1160 os.makedirs(upperdirs)
1161
Martin v. Löwis59e47792009-01-24 14:10:07 +00001162 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001163 if not os.path.isdir(targetpath):
1164 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001165 return targetpath
1166
Georg Brandlb533e262008-05-25 18:19:30 +00001167 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001168 target = open(targetpath, "wb")
1169 shutil.copyfileobj(source, target)
1170 source.close()
1171 target.close()
1172
1173 return targetpath
1174
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001176 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001177 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001178 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001179 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001181 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001182 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001183 raise RuntimeError(
1184 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001185 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001186 if zinfo.file_size > ZIP64_LIMIT:
1187 if not self._allowZip64:
1188 raise LargeZipFile("Filesize would require ZIP64 extensions")
1189 if zinfo.header_offset > ZIP64_LIMIT:
1190 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001191 raise LargeZipFile(
1192 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001193
1194 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001195 """Put the bytes from filename into the archive under the name
1196 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001197 if not self.fp:
1198 raise RuntimeError(
1199 "Attempt to write to ZIP archive that was already closed")
1200
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001202 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001203 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 date_time = mtime[0:6]
1205 # Create ZipInfo instance to store file information
1206 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001207 arcname = filename
1208 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1209 while arcname[0] in (os.sep, os.altsep):
1210 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001211 if isdir:
1212 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001213 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001214 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001216 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001217 else:
Tim Peterse1190062001-01-15 03:34:38 +00001218 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001219
1220 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001221 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001222 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001223
1224 self._writecheck(zinfo)
1225 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001226
1227 if isdir:
1228 zinfo.file_size = 0
1229 zinfo.compress_size = 0
1230 zinfo.CRC = 0
1231 self.filelist.append(zinfo)
1232 self.NameToInfo[zinfo.filename] = zinfo
1233 self.fp.write(zinfo.FileHeader())
1234 return
1235
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001236 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001237 with open(filename, "rb") as fp:
1238 # Must overwrite CRC and sizes with correct data later
1239 zinfo.CRC = CRC = 0
1240 zinfo.compress_size = compress_size = 0
1241 zinfo.file_size = file_size = 0
1242 self.fp.write(zinfo.FileHeader())
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001243 while 1:
1244 buf = fp.read(1024 * 8)
1245 if not buf:
1246 break
1247 file_size = file_size + len(buf)
1248 CRC = crc32(buf, CRC) & 0xffffffff
1249 if cmpr:
1250 buf = cmpr.compress(buf)
1251 compress_size = compress_size + len(buf)
1252 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001253 if cmpr:
1254 buf = cmpr.flush()
1255 compress_size = compress_size + len(buf)
1256 self.fp.write(buf)
1257 zinfo.compress_size = compress_size
1258 else:
1259 zinfo.compress_size = file_size
1260 zinfo.CRC = CRC
1261 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001262 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001263 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001264 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001265 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001266 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001267 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 self.filelist.append(zinfo)
1269 self.NameToInfo[zinfo.filename] = zinfo
1270
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001271 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001272 """Write a file into the archive. The contents is 'data', which
1273 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1274 it is encoded as UTF-8 first.
1275 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001276 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001277 if isinstance(data, str):
1278 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001279 if not isinstance(zinfo_or_arcname, ZipInfo):
1280 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001281 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001282 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001283 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001284 else:
1285 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001286
1287 if not self.fp:
1288 raise RuntimeError(
1289 "Attempt to write to ZIP archive that was already closed")
1290
Guido van Rossum85825dc2007-08-27 17:03:28 +00001291 zinfo.file_size = len(data) # Uncompressed size
1292 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001293 if compress_type is not None:
1294 zinfo.compress_type = compress_type
1295
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001296 self._writecheck(zinfo)
1297 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001298 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001299 co = _get_compressor(zinfo.compress_type)
1300 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001301 data = co.compress(data) + co.flush()
1302 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 else:
1304 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001305 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001306 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001307 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001308 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001309 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001310 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001311 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001312 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 self.filelist.append(zinfo)
1314 self.NameToInfo[zinfo.filename] = zinfo
1315
1316 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001317 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001318 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319
1320 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001321 """Close the file, and for mode "w" and "a" write the ending
1322 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001323 if self.fp is None:
1324 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001325
1326 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001327 count = 0
1328 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001329 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001330 count = count + 1
1331 dt = zinfo.date_time
1332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001334 extra = []
1335 if zinfo.file_size > ZIP64_LIMIT \
1336 or zinfo.compress_size > ZIP64_LIMIT:
1337 extra.append(zinfo.file_size)
1338 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001339 file_size = 0xffffffff
1340 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001341 else:
1342 file_size = zinfo.file_size
1343 compress_size = zinfo.compress_size
1344
1345 if zinfo.header_offset > ZIP64_LIMIT:
1346 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001347 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001348 else:
1349 header_offset = zinfo.header_offset
1350
1351 extra_data = zinfo.extra
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001352 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001353 if extra:
1354 # Append a ZIP64 field to the extra's
1355 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001356 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001357 1, 8*len(extra), *extra) + extra_data
1358
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001359 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001360
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001361 if zinfo.compress_type == ZIP_BZIP2:
1362 min_version = max(BZIP2_VERSION, min_version)
1363
1364 extract_version = max(min_version, zinfo.extract_version)
1365 create_version = max(min_version, zinfo.create_version)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001366 try:
1367 filename, flag_bits = zinfo._encodeFilenameFlags()
1368 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001369 stringCentralDir, create_version,
1370 zinfo.create_system, extract_version, zinfo.reserved,
1371 flag_bits, zinfo.compress_type, dostime, dosdate,
1372 zinfo.CRC, compress_size, file_size,
1373 len(filename), len(extra_data), len(zinfo.comment),
1374 0, zinfo.internal_attr, zinfo.external_attr,
1375 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001376 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001377 print((structCentralDir, stringCentralDir, create_version,
1378 zinfo.create_system, extract_version, zinfo.reserved,
1379 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1380 zinfo.CRC, compress_size, file_size,
1381 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1382 0, zinfo.internal_attr, zinfo.external_attr,
1383 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001384 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001385 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001386 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001387 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 pos2 = self.fp.tell()
1391 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001392 centDirCount = count
1393 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001394 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001395 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1396 centDirOffset > ZIP64_LIMIT or
1397 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001398 # Need to write the ZIP64 end-of-archive records
1399 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001400 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001401 44, 45, 45, 0, 0, centDirCount, centDirCount,
1402 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001403 self.fp.write(zip64endrec)
1404
1405 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001406 structEndArchive64Locator,
1407 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001408 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001409 centDirCount = min(centDirCount, 0xFFFF)
1410 centDirSize = min(centDirSize, 0xFFFFFFFF)
1411 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412
Georg Brandl2ee470f2008-07-16 12:55:28 +00001413 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001414 0, 0, centDirCount, centDirCount,
R David Murrayf50b38a2012-04-12 18:44:58 -04001415 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001416 self.fp.write(endrec)
R David Murrayf50b38a2012-04-12 18:44:58 -04001417 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001418 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001419
Fred Drake3d9091e2001-03-26 15:49:24 +00001420 if not self._filePassed:
1421 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422 self.fp = None
1423
1424
1425class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001426 """Class to create ZIP archives with Python library files and packages."""
1427
Georg Brandl8334fd92010-12-04 10:26:46 +00001428 def __init__(self, file, mode="r", compression=ZIP_STORED,
1429 allowZip64=False, optimize=-1):
1430 ZipFile.__init__(self, file, mode=mode, compression=compression,
1431 allowZip64=allowZip64)
1432 self._optimize = optimize
1433
Georg Brandlfe991052009-09-16 15:54:04 +00001434 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001435 """Add all files from "pathname" to the ZIP archive.
1436
Fred Drake484d7352000-10-02 21:14:52 +00001437 If pathname is a package directory, search the directory and
1438 all package subdirectories recursively for all *.py and enter
1439 the modules into the archive. If pathname is a plain
1440 directory, listdir *.py and enter all modules. Else, pathname
1441 must be a Python *.py file and the module will be put into the
1442 archive. Added modules are always module.pyo or module.pyc.
1443 This method will compile the module.py into module.pyc if
1444 necessary.
1445 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001446 dir, name = os.path.split(pathname)
1447 if os.path.isdir(pathname):
1448 initname = os.path.join(pathname, "__init__.py")
1449 if os.path.isfile(initname):
1450 # This is a package directory, add it
1451 if basename:
1452 basename = "%s/%s" % (basename, name)
1453 else:
1454 basename = name
1455 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001456 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001457 fname, arcname = self._get_codename(initname[0:-3], basename)
1458 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001459 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001460 self.write(fname, arcname)
1461 dirlist = os.listdir(pathname)
1462 dirlist.remove("__init__.py")
1463 # Add all *.py files and package subdirectories
1464 for filename in dirlist:
1465 path = os.path.join(pathname, filename)
1466 root, ext = os.path.splitext(filename)
1467 if os.path.isdir(path):
1468 if os.path.isfile(os.path.join(path, "__init__.py")):
1469 # This is a package directory, add it
1470 self.writepy(path, basename) # Recursive call
1471 elif ext == ".py":
1472 fname, arcname = self._get_codename(path[0:-3],
1473 basename)
1474 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001475 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001476 self.write(fname, arcname)
1477 else:
1478 # This is NOT a package directory, add its files at top level
1479 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001480 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001481 for filename in os.listdir(pathname):
1482 path = os.path.join(pathname, filename)
1483 root, ext = os.path.splitext(filename)
1484 if ext == ".py":
1485 fname, arcname = self._get_codename(path[0:-3],
1486 basename)
1487 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001488 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001489 self.write(fname, arcname)
1490 else:
1491 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001492 raise RuntimeError(
1493 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001494 fname, arcname = self._get_codename(pathname[0:-3], basename)
1495 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001496 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001497 self.write(fname, arcname)
1498
1499 def _get_codename(self, pathname, basename):
1500 """Return (filename, archivename) for the path.
1501
Fred Drake484d7352000-10-02 21:14:52 +00001502 Given a module name path, return the correct file path and
1503 archive name, compiling if necessary. For example, given
1504 /python/lib/string, return (/python/lib/string.pyc, string).
1505 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001506 def _compile(file, optimize=-1):
1507 import py_compile
1508 if self.debug:
1509 print("Compiling", file)
1510 try:
1511 py_compile.compile(file, doraise=True, optimize=optimize)
1512 except py_compile.PyCompileError as error:
1513 print(err.msg)
1514 return False
1515 return True
1516
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001517 file_py = pathname + ".py"
1518 file_pyc = pathname + ".pyc"
1519 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001520 pycache_pyc = imp.cache_from_source(file_py, True)
1521 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001522 if self._optimize == -1:
1523 # legacy mode: use whatever file is present
1524 if (os.path.isfile(file_pyo) and
1525 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1526 # Use .pyo file.
1527 arcname = fname = file_pyo
1528 elif (os.path.isfile(file_pyc) and
1529 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1530 # Use .pyc file.
1531 arcname = fname = file_pyc
1532 elif (os.path.isfile(pycache_pyc) and
1533 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1534 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1535 # file name in the archive.
1536 fname = pycache_pyc
1537 arcname = file_pyc
1538 elif (os.path.isfile(pycache_pyo) and
1539 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1540 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1541 # file name in the archive.
1542 fname = pycache_pyo
1543 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001544 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001545 # Compile py into PEP 3147 pyc file.
1546 if _compile(file_py):
1547 fname = (pycache_pyc if __debug__ else pycache_pyo)
1548 arcname = (file_pyc if __debug__ else file_pyo)
1549 else:
1550 fname = arcname = file_py
1551 else:
1552 # new mode: use given optimization level
1553 if self._optimize == 0:
1554 fname = pycache_pyc
1555 arcname = file_pyc
1556 else:
1557 fname = pycache_pyo
1558 arcname = file_pyo
1559 if not (os.path.isfile(fname) and
1560 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1561 if not _compile(file_py, optimize=self._optimize):
1562 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001563 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001564 if basename:
1565 archivename = "%s/%s" % (basename, archivename)
1566 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001567
1568
1569def main(args = None):
1570 import textwrap
1571 USAGE=textwrap.dedent("""\
1572 Usage:
1573 zipfile.py -l zipfile.zip # Show listing of a zipfile
1574 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1575 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1576 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1577 """)
1578 if args is None:
1579 args = sys.argv[1:]
1580
1581 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001582 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001583 sys.exit(1)
1584
1585 if args[0] == '-l':
1586 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001587 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001588 sys.exit(1)
1589 zf = ZipFile(args[1], 'r')
1590 zf.printdir()
1591 zf.close()
1592
1593 elif args[0] == '-t':
1594 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001595 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001596 sys.exit(1)
1597 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001598 badfile = zf.testzip()
1599 if badfile:
1600 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001601 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001602
1603 elif args[0] == '-e':
1604 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001605 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001606 sys.exit(1)
1607
1608 zf = ZipFile(args[1], 'r')
1609 out = args[2]
1610 for path in zf.namelist():
1611 if path.startswith('./'):
1612 tgt = os.path.join(out, path[2:])
1613 else:
1614 tgt = os.path.join(out, path)
1615
1616 tgtdir = os.path.dirname(tgt)
1617 if not os.path.exists(tgtdir):
1618 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001619 with open(tgt, 'wb') as fp:
1620 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621 zf.close()
1622
1623 elif args[0] == '-c':
1624 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001625 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001626 sys.exit(1)
1627
1628 def addToZip(zf, path, zippath):
1629 if os.path.isfile(path):
1630 zf.write(path, zippath, ZIP_DEFLATED)
1631 elif os.path.isdir(path):
1632 for nm in os.listdir(path):
1633 addToZip(zf,
1634 os.path.join(path, nm), os.path.join(zippath, nm))
1635 # else: ignore
1636
1637 zf = ZipFile(args[1], 'w', allowZip64=True)
1638 for src in args[2:]:
1639 addToZip(zf, src, os.path.basename(src))
1640
1641 zf.close()
1642
1643if __name__ == "__main__":
1644 main()