blob: 59a86e2415f12d073c3680746b56ca16f37e8312 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Thomas Wouters0e3f5912006-08-11 14:57:12 +000031ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
48structEndCentDir = b"<4s4H2LH"
49magicEndCentDir = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndCentDir)
51
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
68magicCentralDir = b"PK\001\002"
69sizeCentralDir = struct.calcsize(structCentralDir)
70
71# The "local file header" structure, magic number, size, and indices
72# (section V.A in the format document)
73structFileHeader = "<4s2B4HL2L2H"
74magicFileHeader = b"PK\003\004"
75sizeFileHeader = struct.calcsize(structFileHeader)
76
77# The "Zip64 end of central directory locator" structure, magic number, and size
78structEndCentDir64Locator = "<4sLQL"
79magicEndCentDir64Locator = b"PK\x06\x07"
80sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
81
82# The "Zip64 end of central directory" record, magic number, size, and indices
83# (section V.G in the format document)
84structEndCentDir64 = "<4sQ2H2L4Q"
85magicEndCentDir64 = b"PK\x06\x06"
86sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
87
88_CD64_SIGNATURE = 0
89_CD64_DIRECTORY_RECSIZE = 1
90_CD64_CREATE_VERSION = 2
91_CD64_EXTRACT_VERSION = 3
92_CD64_DISK_NUMBER = 4
93_CD64_DISK_NUMBER_START = 5
94_CD64_NUMBER_ENTRIES_THIS_DISK = 6
95_CD64_NUMBER_ENTRIES_TOTAL = 7
96_CD64_DIRECTORY_SIZE = 8
97_CD64_OFFSET_START_CENTDIR = 9
Guido van Rossum32abe6f2000-03-31 17:30:02 +000098
Fred Drake3e038e52001-02-28 17:56:26 +000099# indexes of entries in the central directory structure
100_CD_SIGNATURE = 0
101_CD_CREATE_VERSION = 1
102_CD_CREATE_SYSTEM = 2
103_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000104_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000105_CD_FLAG_BITS = 5
106_CD_COMPRESS_TYPE = 6
107_CD_TIME = 7
108_CD_DATE = 8
109_CD_CRC = 9
110_CD_COMPRESSED_SIZE = 10
111_CD_UNCOMPRESSED_SIZE = 11
112_CD_FILENAME_LENGTH = 12
113_CD_EXTRA_FIELD_LENGTH = 13
114_CD_COMMENT_LENGTH = 14
115_CD_DISK_NUMBER_START = 15
116_CD_INTERNAL_FILE_ATTRIBUTES = 16
117_CD_EXTERNAL_FILE_ATTRIBUTES = 17
118_CD_LOCAL_HEADER_OFFSET = 18
119
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000120# The "local file header" structure, magic number, size, and indices
121# (section V.A in the format document)
122structFileHeader = "<4s2B4HL2L2H"
123magicFileHeader = b"PK\003\004"
124sizeFileHeader = struct.calcsize(structFileHeader)
125
Fred Drake3e038e52001-02-28 17:56:26 +0000126_FH_SIGNATURE = 0
127_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_GENERAL_PURPOSE_FLAG_BITS = 3
130_FH_COMPRESSION_METHOD = 4
131_FH_LAST_MOD_TIME = 5
132_FH_LAST_MOD_DATE = 6
133_FH_CRC = 7
134_FH_COMPRESSED_SIZE = 8
135_FH_UNCOMPRESSED_SIZE = 9
136_FH_FILENAME_LENGTH = 10
137_FH_EXTRA_FIELD_LENGTH = 11
138
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000139# The "Zip64 end of central directory locator" structure, magic number, and size
140structEndCentDir64Locator = "<4sLQL"
141magicEndCentDir64Locator = b"PK\x06\x07"
142sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
143
144# The "Zip64 end of central directory" record, magic number, size, and indices
145# (section V.G in the format document)
146structEndCentDir64 = "<4sQ2H2L4Q"
147magicEndCentDir64 = b"PK\x06\x06"
148sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
149
150_CD64_SIGNATURE = 0
151_CD64_DIRECTORY_RECSIZE = 1
152_CD64_CREATE_VERSION = 2
153_CD64_EXTRACT_VERSION = 3
154_CD64_DISK_NUMBER = 4
155_CD64_DISK_NUMBER_START = 5
156_CD64_NUMBER_ENTRIES_THIS_DISK = 6
157_CD64_NUMBER_ENTRIES_TOTAL = 7
158_CD64_DIRECTORY_SIZE = 8
159_CD64_OFFSET_START_CENTDIR = 9
160
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000162 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000164 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000166 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000167 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000168 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000169 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000171 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000172
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000173def _EndRecData64(fpin, offset, endrec):
174 """
175 Read the ZIP64 end-of-archive records and use that to update endrec
176 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000177 fpin.seek(offset - sizeEndCentDir64Locator, 2)
178 data = fpin.read(sizeEndCentDir64Locator)
179 sig, diskno, reloff, disks = struct.unpack(structEndCentDir64Locator, data)
180 if sig != magicEndCentDir64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
184 raise BadZipfile("zipfiles that span multiple disks are not supported")
185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000191 struct.unpack(structEndCentDir64, data)
192 if sig != magicEndCentDir64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000196 endrec[_ECD_DISK_NUMBER] = disk_num
197 endrec[_ECD_DISK_START] = disk_dir
198 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
199 endrec[_ECD_ENTRIES_TOTAL] = dircount2
200 endrec[_ECD_SIZE] = dirsize
201 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000205def _EndRecData(fpin):
206 """Return data from the "End of Central Directory" record, or None.
207
208 The data is a list of the nine items in the ZIP "End of central dir"
209 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210
211 # Determine file size
212 fpin.seek(0, 2)
213 filesize = fpin.tell()
214
215 # Check to see if this is ZIP file with no archive comment (the
216 # "end of central directory" structure should be the last item in the
217 # file if this is the case).
218 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219 data = fpin.read()
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 if data[0:4] == magicEndCentDir and data[-2:] == b"\000\000":
221 # the signature is correct and there's no comment, unpack structure
222 endrec = struct.unpack(structEndCentDir, data)
223 endrec=list(endrec)
224
225 # Append a blank comment and record start offset
226 endrec.append(b"")
227 endrec.append(filesize - sizeEndCentDir)
228 if endrec[_ECD_OFFSET] == 0xffffffff:
229 # the value for the "offset of the start of the central directory"
230 # indicates that there is a "Zip64 end of central directory"
231 # structure present, so go look for it
232 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
233
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000244 start = data.rfind(magicEndCentDir)
245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
248 endrec = list(struct.unpack(structEndCentDir, recData))
249 comment = data[start+sizeEndCentDir:]
250 # check that comment length is correct
251 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252 # Append the archive comment and start offset
253 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec.append(maxCommentStart + start)
255 if endrec[_ECD_OFFSET] == 0xffffffff:
256 # There is apparently a "Zip64 end of central directory"
257 # structure present, so go look for it
258 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000259 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Unable to find a valid end of central directory structure
262 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000263
Fred Drake484d7352000-10-02 21:14:52 +0000264
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000266 """Class with attributes describing each file in the ZIP archive."""
267
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000268 __slots__ = (
269 'orig_filename',
270 'filename',
271 'date_time',
272 'compress_type',
273 'comment',
274 'extra',
275 'create_system',
276 'create_version',
277 'extract_version',
278 'reserved',
279 'flag_bits',
280 'volume',
281 'internal_attr',
282 'external_attr',
283 'header_offset',
284 'CRC',
285 'compress_size',
286 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000287 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 )
289
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000290 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292
293 # Terminate the file name at the first null byte. Null bytes in file
294 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000295 null_byte = filename.find(chr(0))
296 if null_byte >= 0:
297 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 # This is used to ensure paths in generated ZIP files always use
299 # forward slashes as the directory separator, as required by the
300 # ZIP format specification.
301 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000302 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000303
Greg Ward8e36d282003-06-18 00:53:06 +0000304 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000305 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000307 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000308 self.comment = b"" # Comment for each file
309 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000310 if sys.platform == 'win32':
311 self.create_system = 0 # System which created ZIP archive
312 else:
313 # Assume everything else is unix-y
314 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000315 self.create_version = 20 # Version which created ZIP archive
316 self.extract_version = 20 # Version needed to extract archive
317 self.reserved = 0 # Must be zero
318 self.flag_bits = 0 # ZIP flag bits
319 self.volume = 0 # Volume number of file header
320 self.internal_attr = 0 # Internal attributes
321 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000323 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000324 # CRC CRC-32 of the uncompressed file
325 # compress_size Size of the compressed file
326 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327
328 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000329 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000330 dt = self.date_time
331 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000332 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000333 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000334 # Set these to zero because we write them after the file data
335 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 else:
Tim Peterse1190062001-01-15 03:34:38 +0000337 CRC = self.CRC
338 compress_size = self.compress_size
339 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000340
341 extra = self.extra
342
343 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
344 # File is larger than what fits into a 4 byte integer,
345 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000346 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000347 extra = extra + struct.pack(fmt,
348 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000349 file_size = 0xffffffff
350 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000351 self.extract_version = max(45, self.extract_version)
352 self.create_version = max(45, self.extract_version)
353
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000354 filename, flag_bits = self._encodeFilenameFlags()
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000355 header = struct.pack(structFileHeader, magicFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000356 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 self.compress_type, dostime, dosdate, CRC,
358 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000359 len(filename), len(extra))
360 return header + filename + extra
361
362 def _encodeFilenameFlags(self):
363 try:
364 return self.filename.encode('ascii'), self.flag_bits
365 except UnicodeEncodeError:
366 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000367
368 def _decodeExtra(self):
369 # Try to decode the extra field.
370 extra = self.extra
371 unpack = struct.unpack
372 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000373 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374 if tp == 1:
375 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000376 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000378 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000380 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000381 elif ln == 0:
382 counts = ()
383 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000384 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000385
386 idx = 0
387
388 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000389 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000390 self.file_size = counts[idx]
391 idx += 1
392
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000393 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000394 self.compress_size = counts[idx]
395 idx += 1
396
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000397 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000398 old = self.header_offset
399 self.header_offset = counts[idx]
400 idx+=1
401
402 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000403
404
Thomas Wouterscf297e42007-02-23 15:07:44 +0000405class _ZipDecrypter:
406 """Class to handle decryption of files stored within a ZIP archive.
407
408 ZIP supports a password-based form of encryption. Even though known
409 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000410 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000411
412 Usage:
413 zd = _ZipDecrypter(mypwd)
414 plain_char = zd(cypher_char)
415 plain_text = map(zd, cypher_text)
416 """
417
418 def _GenerateCRCTable():
419 """Generate a CRC-32 table.
420
421 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
422 internal keys. We noticed that a direct implementation is faster than
423 relying on binascii.crc32().
424 """
425 poly = 0xedb88320
426 table = [0] * 256
427 for i in range(256):
428 crc = i
429 for j in range(8):
430 if crc & 1:
431 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
432 else:
433 crc = ((crc >> 1) & 0x7FFFFFFF)
434 table[i] = crc
435 return table
436 crctable = _GenerateCRCTable()
437
438 def _crc32(self, ch, crc):
439 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000440 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000441
442 def __init__(self, pwd):
443 self.key0 = 305419896
444 self.key1 = 591751049
445 self.key2 = 878082192
446 for p in pwd:
447 self._UpdateKeys(p)
448
449 def _UpdateKeys(self, c):
450 self.key0 = self._crc32(c, self.key0)
451 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
452 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000453 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000454
455 def __call__(self, c):
456 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000457 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000458 k = self.key2 | 2
459 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000460 self._UpdateKeys(c)
461 return c
462
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463class ZipExtFile:
464 """File-like object for reading an archive member.
465 Is returned by ZipFile.open().
466 """
467
468 def __init__(self, fileobj, zipinfo, decrypt=None):
469 self.fileobj = fileobj
470 self.decrypter = decrypt
471 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000472 self.rawbuffer = b''
473 self.readbuffer = b''
474 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000475 self.eof = False
476 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000477 self.nlSeps = (b"\n", )
478 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000479
480 self.compress_type = zipinfo.compress_type
481 self.compress_size = zipinfo.compress_size
482
483 self.closed = False
484 self.mode = "r"
485 self.name = zipinfo.filename
486
487 # read from compressed files in 64k blocks
488 self.compreadsize = 64*1024
489 if self.compress_type == ZIP_DEFLATED:
490 self.dc = zlib.decompressobj(-15)
491
492 def set_univ_newlines(self, univ_newlines):
493 self.univ_newlines = univ_newlines
494
495 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000496 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000497 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000498 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000499
500 def __iter__(self):
501 return self
502
503 def __next__(self):
504 nextline = self.readline()
505 if not nextline:
506 raise StopIteration()
507
508 return nextline
509
510 def close(self):
511 self.closed = True
512
513 def _checkfornewline(self):
514 nl, nllen = -1, -1
515 if self.linebuffer:
516 # ugly check for cases where half of an \r\n pair was
517 # read on the last pass, and the \r was discarded. In this
518 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000519 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000520 self.linebuffer = self.linebuffer[1:]
521
522 for sep in self.nlSeps:
523 nl = self.linebuffer.find(sep)
524 if nl >= 0:
525 nllen = len(sep)
526 return nl, nllen
527
528 return nl, nllen
529
530 def readline(self, size = -1):
531 """Read a line with approx. size. If size is negative,
532 read a whole line.
533 """
534 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000535 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000536 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000537 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000538
539 # check for a newline already in buffer
540 nl, nllen = self._checkfornewline()
541
542 if nl >= 0:
543 # the next line was already in the buffer
544 nl = min(nl, size)
545 else:
546 # no line break in buffer - try to read more
547 size -= len(self.linebuffer)
548 while nl < 0 and size > 0:
549 buf = self.read(min(size, 100))
550 if not buf:
551 break
552 self.linebuffer += buf
553 size -= len(buf)
554
555 # check for a newline in buffer
556 nl, nllen = self._checkfornewline()
557
558 # we either ran out of bytes in the file, or
559 # met the specified size limit without finding a newline,
560 # so return current buffer
561 if nl < 0:
562 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000563 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564 return s
565
566 buf = self.linebuffer[:nl]
567 self.lastdiscard = self.linebuffer[nl:nl + nllen]
568 self.linebuffer = self.linebuffer[nl + nllen:]
569
570 # line is always returned with \n as newline char (except possibly
571 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000572 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573
574 def readlines(self, sizehint = -1):
575 """Return a list with all (following) lines. The sizehint parameter
576 is ignored in this implementation.
577 """
578 result = []
579 while True:
580 line = self.readline()
581 if not line: break
582 result.append(line)
583 return result
584
585 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000586 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000587 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000588 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000589
590 # determine read size
591 bytesToRead = self.compress_size - self.bytes_read
592
593 # adjust read size for encrypted files since the first 12 bytes
594 # are for the encryption/password information
595 if self.decrypter is not None:
596 bytesToRead -= 12
597
598 if size is not None and size >= 0:
599 if self.compress_type == ZIP_STORED:
600 lr = len(self.readbuffer)
601 bytesToRead = min(bytesToRead, size - lr)
602 elif self.compress_type == ZIP_DEFLATED:
603 if len(self.readbuffer) > size:
604 # the user has requested fewer bytes than we've already
605 # pulled through the decompressor; don't read any more
606 bytesToRead = 0
607 else:
608 # user will use up the buffer, so read some more
609 lr = len(self.rawbuffer)
610 bytesToRead = min(bytesToRead, self.compreadsize - lr)
611
612 # avoid reading past end of file contents
613 if bytesToRead + self.bytes_read > self.compress_size:
614 bytesToRead = self.compress_size - self.bytes_read
615
616 # try to read from file (if necessary)
617 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000618 data = self.fileobj.read(bytesToRead)
619 self.bytes_read += len(data)
620 try:
621 self.rawbuffer += data
622 except:
623 print(repr(self.fileobj), repr(self.rawbuffer),
624 repr(data))
625 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000626
627 # handle contents of raw buffer
628 if self.rawbuffer:
629 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000630 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000631
632 # decrypt new data if we were given an object to handle that
633 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000634 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000635
636 # decompress newly read data if necessary
637 if newdata and self.compress_type == ZIP_DEFLATED:
638 newdata = self.dc.decompress(newdata)
639 self.rawbuffer = self.dc.unconsumed_tail
640 if self.eof and len(self.rawbuffer) == 0:
641 # we're out of raw bytes (both from the file and
642 # the local buffer); flush just to make sure the
643 # decompressor is done
644 newdata += self.dc.flush()
645 # prevent decompressor from being used again
646 self.dc = None
647
648 self.readbuffer += newdata
649
650
651 # return what the user asked for
652 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000653 data = self.readbuffer
654 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000655 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000656 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000657 self.readbuffer = self.readbuffer[size:]
658
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000659 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660
661
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000662class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000663 """ Class with methods to open, read, write, close, list zip files.
664
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000665 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 file: Either the path to the file, or a file-like object.
668 If it is a path, the file will be opened and closed by ZipFile.
669 mode: The mode can be either read "r", write "w" or append "a".
670 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000671 allowZip64: if True ZipFile will create files with ZIP64 extensions when
672 needed, otherwise it will raise an exception when this would
673 be necessary.
674
Fred Drake3d9091e2001-03-26 15:49:24 +0000675 """
Fred Drake484d7352000-10-02 21:14:52 +0000676
Fred Drake90eac282001-02-28 05:29:34 +0000677 fp = None # Set here since __del__ checks it
678
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000679 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000680 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000681 if mode not in ("r", "w", "a"):
682 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
683
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 if compression == ZIP_STORED:
685 pass
686 elif compression == ZIP_DEFLATED:
687 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000688 raise RuntimeError(
689 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000691 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000692
693 self._allowZip64 = allowZip64
694 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000695 self.debug = 0 # Level of printing: 0 through 3
696 self.NameToInfo = {} # Find file info given name
697 self.filelist = [] # List of ZipInfo instances for archive
698 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000699 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000700 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000701 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000704 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000705 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000706 self._filePassed = 0
707 self.filename = file
708 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000709 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000710 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000711 except IOError:
712 if mode == 'a':
713 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000714 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000715 else:
716 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000717 else:
718 self._filePassed = 1
719 self.fp = file
720 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000721
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 self._GetContents()
724 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000725 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000727 try: # See if file is a zip file
728 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000730 self.fp.seek(self.start_dir, 0)
731 except BadZipfile: # file is not a zip file, just append
732 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000734 if not self._filePassed:
735 self.fp.close()
736 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000737 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738
739 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000740 """Read the directory, making sure we close the file if the format
741 is bad."""
742 try:
743 self._RealGetContents()
744 except BadZipfile:
745 if not self._filePassed:
746 self.fp.close()
747 self.fp = None
748 raise
749
750 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000751 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000752 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000753 endrec = _EndRecData(fp)
754 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000755 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000757 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000758 size_cd = endrec[_ECD_SIZE] # bytes in central directory
759 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
760 self.comment = endrec[_ECD_COMMENT] # archive comment
761
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000763 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
764 if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
765 # If the offset of the "End of Central Dir" record requires Zip64
766 # extension structures, account for them
767 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
768
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000770 inferred = concat + offset_cd
771 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 # self.start_dir: Position of start of central directory
773 self.start_dir = offset_cd + concat
774 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000775 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000776 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 total = 0
778 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000779 centdir = fp.read(sizeCentralDir)
780 if centdir[0:4] != magicCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000781 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 centdir = struct.unpack(structCentralDir, centdir)
783 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000784 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000785 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000786 flags = centdir[5]
787 if flags & 0x800:
788 # UTF-8 file names extension
789 filename = filename.decode('utf-8')
790 else:
791 # Historical ZIP filename encoding
792 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000794 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000795 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
796 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000797 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 (x.create_version, x.create_system, x.extract_version, x.reserved,
799 x.flag_bits, x.compress_type, t, d,
800 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
801 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
802 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000803 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000805 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000806
807 x._decodeExtra()
808 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 self.filelist.append(x)
810 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000811
812 # update total bytes read from central directory
813 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
814 + centdir[_CD_EXTRA_FIELD_LENGTH]
815 + centdir[_CD_COMMENT_LENGTH])
816
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000818 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000819
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820
821 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 l = []
824 for data in self.filelist:
825 l.append(data.filename)
826 return l
827
828 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000829 """Return a list of class ZipInfo instances for files in the
830 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000831 return self.filelist
832
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000833 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000834 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000835 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
836 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000838 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000839 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
840 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841
842 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000843 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 for zinfo in self.filelist:
845 try:
Tim Peterse1190062001-01-15 03:34:38 +0000846 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000847 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848 return zinfo.filename
849
850 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000851 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000852 info = self.NameToInfo.get(name)
853 if info is None:
854 raise KeyError(
855 'There is no item named %r in the archive' % name)
856
857 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858
Thomas Wouterscf297e42007-02-23 15:07:44 +0000859 def setpassword(self, pwd):
860 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000861 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000862 self.pwd = pwd
863
864 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000865 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 return self.open(name, "r", pwd).read()
867
868 def open(self, name, mode="r", pwd=None):
869 """Return file-like object for 'name'."""
870 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000871 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000872 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000873 raise RuntimeError(
874 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000875
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 # Only open a new file for instances where we were not
877 # given a file object in the constructor
878 if self._filePassed:
879 zef_file = self.fp
880 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000881 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882
Georg Brandlb533e262008-05-25 18:19:30 +0000883 # Make sure we have an info object
884 if isinstance(name, ZipInfo):
885 # 'name' is already an info object
886 zinfo = name
887 else:
888 # Get info object for name
889 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890
891 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000892
893 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000894 fheader = zef_file.read(sizeFileHeader)
895 if fheader[0:4] != magicFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000896 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000897
898 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000900 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000902
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000903 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000904 raise BadZipfile(
905 'File name in directory %r and header %r differ.'
906 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000907
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908 # check for encrypted flag & handle password
909 is_encrypted = zinfo.flag_bits & 0x1
910 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000911 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912 if not pwd:
913 pwd = self.pwd
914 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000915 raise RuntimeError("File %s is encrypted, "
916 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917
Thomas Wouterscf297e42007-02-23 15:07:44 +0000918 zd = _ZipDecrypter(pwd)
919 # The first 12 bytes in the cypher stream is an encryption header
920 # used to strengthen the algorithm. The first 11 bytes are
921 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000922 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000923 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000925 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000926 if zinfo.flag_bits & 0x8:
927 # compare against the file type from extended local headers
928 check_byte = (zinfo._raw_time >> 8) & 0xff
929 else:
930 # compare against the CRC otherwise
931 check_byte = (zinfo.CRC >> 24) & 0xff
932 if h[11] != check_byte:
933 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934
935 # build and return a ZipExtFile
936 if zd is None:
937 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000938 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939 zef = ZipExtFile(zef_file, zinfo, zd)
940
941 # set universal newlines on ZipExtFile if necessary
942 if "U" in mode:
943 zef.set_univ_newlines(True)
944 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000945
Christian Heimes790c8232008-01-07 21:14:23 +0000946 def extract(self, member, path=None, pwd=None):
947 """Extract a member from the archive to the current working directory,
948 using its full name. Its file information is extracted as accurately
949 as possible. `member' may be a filename or a ZipInfo object. You can
950 specify a different directory using `path'.
951 """
952 if not isinstance(member, ZipInfo):
953 member = self.getinfo(member)
954
955 if path is None:
956 path = os.getcwd()
957
958 return self._extract_member(member, path, pwd)
959
960 def extractall(self, path=None, members=None, pwd=None):
961 """Extract all members from the archive to the current working
962 directory. `path' specifies a different directory to extract to.
963 `members' is optional and must be a subset of the list returned
964 by namelist().
965 """
966 if members is None:
967 members = self.namelist()
968
969 for zipinfo in members:
970 self.extract(zipinfo, path, pwd)
971
972 def _extract_member(self, member, targetpath, pwd):
973 """Extract the ZipInfo object 'member' to a physical
974 file on the path targetpath.
975 """
976 # build the destination pathname, replacing
977 # forward slashes to platform specific separators.
978 if targetpath[-1:] == "/":
979 targetpath = targetpath[:-1]
980
981 # don't include leading "/" from file name if present
982 if os.path.isabs(member.filename):
983 targetpath = os.path.join(targetpath, member.filename[1:])
984 else:
985 targetpath = os.path.join(targetpath, member.filename)
986
987 targetpath = os.path.normpath(targetpath)
988
989 # Create all upper directories if necessary.
990 upperdirs = os.path.dirname(targetpath)
991 if upperdirs and not os.path.exists(upperdirs):
992 os.makedirs(upperdirs)
993
Georg Brandlb533e262008-05-25 18:19:30 +0000994 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000995 target = open(targetpath, "wb")
996 shutil.copyfileobj(source, target)
997 source.close()
998 target.close()
999
1000 return targetpath
1001
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001002 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001003 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001004 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001005 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001006 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001008 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001010 raise RuntimeError(
1011 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001012 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001013 raise RuntimeError(
1014 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001016 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001017 if zinfo.file_size > ZIP64_LIMIT:
1018 if not self._allowZip64:
1019 raise LargeZipFile("Filesize would require ZIP64 extensions")
1020 if zinfo.header_offset > ZIP64_LIMIT:
1021 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001022 raise LargeZipFile(
1023 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024
1025 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001026 """Put the bytes from filename into the archive under the name
1027 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001028 if not self.fp:
1029 raise RuntimeError(
1030 "Attempt to write to ZIP archive that was already closed")
1031
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001033 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 date_time = mtime[0:6]
1035 # Create ZipInfo instance to store file information
1036 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001037 arcname = filename
1038 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1039 while arcname[0] in (os.sep, os.altsep):
1040 arcname = arcname[1:]
1041 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001042 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001044 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001045 else:
Tim Peterse1190062001-01-15 03:34:38 +00001046 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001047
1048 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001049 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001050 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001051
1052 self._writecheck(zinfo)
1053 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001054 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001055 # Must overwrite CRC and sizes with correct data later
1056 zinfo.CRC = CRC = 0
1057 zinfo.compress_size = compress_size = 0
1058 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 if zinfo.compress_type == ZIP_DEFLATED:
1061 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1062 zlib.DEFLATED, -15)
1063 else:
1064 cmpr = None
1065 while 1:
1066 buf = fp.read(1024 * 8)
1067 if not buf:
1068 break
1069 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001070 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071 if cmpr:
1072 buf = cmpr.compress(buf)
1073 compress_size = compress_size + len(buf)
1074 self.fp.write(buf)
1075 fp.close()
1076 if cmpr:
1077 buf = cmpr.flush()
1078 compress_size = compress_size + len(buf)
1079 self.fp.write(buf)
1080 zinfo.compress_size = compress_size
1081 else:
1082 zinfo.compress_size = file_size
1083 zinfo.CRC = CRC
1084 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001085 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001086 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001087 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001088 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001090 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 self.filelist.append(zinfo)
1092 self.NameToInfo[zinfo.filename] = zinfo
1093
Guido van Rossum85825dc2007-08-27 17:03:28 +00001094 def writestr(self, zinfo_or_arcname, data):
1095 """Write a file into the archive. The contents is 'data', which
1096 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1097 it is encoded as UTF-8 first.
1098 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001099 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001100 if isinstance(data, str):
1101 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001102 if not isinstance(zinfo_or_arcname, ZipInfo):
1103 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001104 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001105 zinfo.compress_type = self.compression
1106 else:
1107 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001108
1109 if not self.fp:
1110 raise RuntimeError(
1111 "Attempt to write to ZIP archive that was already closed")
1112
Guido van Rossum85825dc2007-08-27 17:03:28 +00001113 zinfo.file_size = len(data) # Uncompressed size
1114 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001115 self._writecheck(zinfo)
1116 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001117 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 if zinfo.compress_type == ZIP_DEFLATED:
1119 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1120 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001121 data = co.compress(data) + co.flush()
1122 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 else:
1124 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001125 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001127 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001128 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001130 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001131 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001132 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 self.filelist.append(zinfo)
1134 self.NameToInfo[zinfo.filename] = zinfo
1135
1136 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001137 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001138 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139
1140 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001141 """Close the file, and for mode "w" and "a" write the ending
1142 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001143 if self.fp is None:
1144 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001145
1146 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001147 count = 0
1148 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001149 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001150 count = count + 1
1151 dt = zinfo.date_time
1152 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001153 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001154 extra = []
1155 if zinfo.file_size > ZIP64_LIMIT \
1156 or zinfo.compress_size > ZIP64_LIMIT:
1157 extra.append(zinfo.file_size)
1158 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001159 file_size = 0xffffffff
1160 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 else:
1162 file_size = zinfo.file_size
1163 compress_size = zinfo.compress_size
1164
1165 if zinfo.header_offset > ZIP64_LIMIT:
1166 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001167 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001168 else:
1169 header_offset = zinfo.header_offset
1170
1171 extra_data = zinfo.extra
1172 if extra:
1173 # Append a ZIP64 field to the extra's
1174 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001175 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001176 1, 8*len(extra), *extra) + extra_data
1177
1178 extract_version = max(45, zinfo.extract_version)
1179 create_version = max(45, zinfo.create_version)
1180 else:
1181 extract_version = zinfo.extract_version
1182 create_version = zinfo.create_version
1183
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001184 try:
1185 filename, flag_bits = zinfo._encodeFilenameFlags()
1186 centdir = struct.pack(structCentralDir,
1187 magicCentralDir, create_version,
1188 zinfo.create_system, extract_version, zinfo.reserved,
1189 flag_bits, zinfo.compress_type, dostime, dosdate,
1190 zinfo.CRC, compress_size, file_size,
1191 len(filename), len(extra_data), len(zinfo.comment),
1192 0, zinfo.internal_attr, zinfo.external_attr,
1193 header_offset)
1194 except DeprecationWarning:
1195 print >>sys.stderr, (structCentralDir,
1196 stringCentralDir, create_version,
1197 zinfo.create_system, extract_version, zinfo.reserved,
1198 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1199 zinfo.CRC, compress_size, file_size,
1200 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1201 0, zinfo.internal_attr, zinfo.external_attr,
1202 header_offset)
1203 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001205 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001206 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001207 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001208
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001209 pos2 = self.fp.tell()
1210 # Write end-of-zip-archive record
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001211 centDirOffset = pos1
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212 if pos1 > ZIP64_LIMIT:
1213 # Need to write the ZIP64 end-of-archive records
1214 zip64endrec = struct.pack(
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001215 structEndCentDir64, magicEndCentDir64,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001216 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1217 self.fp.write(zip64endrec)
1218
1219 zip64locrec = struct.pack(
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001220 structEndCentDir64Locator,
1221 magicEndCentDir64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001222 self.fp.write(zip64locrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001223 centDirOffset = 0xFFFFFFFF
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001224
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001225 # check for valid comment length
1226 if len(self.comment) >= ZIP_MAX_COMMENT:
1227 if self.debug > 0:
1228 msg = 'Archive comment is too long; truncating to %d bytes' \
1229 % ZIP_MAX_COMMENT
1230 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001231
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001232 endrec = struct.pack(structEndCentDir, magicEndCentDir,
1233 0, 0, count % ZIP_FILECOUNT_LIMIT,
1234 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1235 centDirOffset, len(self.comment))
1236 self.fp.write(endrec)
1237 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001238 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001239
Fred Drake3d9091e2001-03-26 15:49:24 +00001240 if not self._filePassed:
1241 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 self.fp = None
1243
1244
1245class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001246 """Class to create ZIP archives with Python library files and packages."""
1247
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248 def writepy(self, pathname, basename = ""):
1249 """Add all files from "pathname" to the ZIP archive.
1250
Fred Drake484d7352000-10-02 21:14:52 +00001251 If pathname is a package directory, search the directory and
1252 all package subdirectories recursively for all *.py and enter
1253 the modules into the archive. If pathname is a plain
1254 directory, listdir *.py and enter all modules. Else, pathname
1255 must be a Python *.py file and the module will be put into the
1256 archive. Added modules are always module.pyo or module.pyc.
1257 This method will compile the module.py into module.pyc if
1258 necessary.
1259 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 dir, name = os.path.split(pathname)
1261 if os.path.isdir(pathname):
1262 initname = os.path.join(pathname, "__init__.py")
1263 if os.path.isfile(initname):
1264 # This is a package directory, add it
1265 if basename:
1266 basename = "%s/%s" % (basename, name)
1267 else:
1268 basename = name
1269 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001270 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001271 fname, arcname = self._get_codename(initname[0:-3], basename)
1272 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001273 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001274 self.write(fname, arcname)
1275 dirlist = os.listdir(pathname)
1276 dirlist.remove("__init__.py")
1277 # Add all *.py files and package subdirectories
1278 for filename in dirlist:
1279 path = os.path.join(pathname, filename)
1280 root, ext = os.path.splitext(filename)
1281 if os.path.isdir(path):
1282 if os.path.isfile(os.path.join(path, "__init__.py")):
1283 # This is a package directory, add it
1284 self.writepy(path, basename) # Recursive call
1285 elif ext == ".py":
1286 fname, arcname = self._get_codename(path[0:-3],
1287 basename)
1288 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001289 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001290 self.write(fname, arcname)
1291 else:
1292 # This is NOT a package directory, add its files at top level
1293 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001294 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001295 for filename in os.listdir(pathname):
1296 path = os.path.join(pathname, filename)
1297 root, ext = os.path.splitext(filename)
1298 if ext == ".py":
1299 fname, arcname = self._get_codename(path[0:-3],
1300 basename)
1301 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001302 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 self.write(fname, arcname)
1304 else:
1305 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001306 raise RuntimeError(
1307 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 fname, arcname = self._get_codename(pathname[0:-3], basename)
1309 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001310 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001311 self.write(fname, arcname)
1312
1313 def _get_codename(self, pathname, basename):
1314 """Return (filename, archivename) for the path.
1315
Fred Drake484d7352000-10-02 21:14:52 +00001316 Given a module name path, return the correct file path and
1317 archive name, compiling if necessary. For example, given
1318 /python/lib/string, return (/python/lib/string.pyc, string).
1319 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 file_py = pathname + ".py"
1321 file_pyc = pathname + ".pyc"
1322 file_pyo = pathname + ".pyo"
1323 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001324 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001325 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001327 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001328 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001330 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001331 try:
1332 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001333 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001334 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001335 fname = file_pyc
1336 else:
1337 fname = file_pyc
1338 archivename = os.path.split(fname)[1]
1339 if basename:
1340 archivename = "%s/%s" % (basename, archivename)
1341 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001342
1343
1344def main(args = None):
1345 import textwrap
1346 USAGE=textwrap.dedent("""\
1347 Usage:
1348 zipfile.py -l zipfile.zip # Show listing of a zipfile
1349 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1350 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1351 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1352 """)
1353 if args is None:
1354 args = sys.argv[1:]
1355
1356 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001357 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001358 sys.exit(1)
1359
1360 if args[0] == '-l':
1361 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001362 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001363 sys.exit(1)
1364 zf = ZipFile(args[1], 'r')
1365 zf.printdir()
1366 zf.close()
1367
1368 elif args[0] == '-t':
1369 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001370 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371 sys.exit(1)
1372 zf = ZipFile(args[1], 'r')
1373 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001374 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001375
1376 elif args[0] == '-e':
1377 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001378 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001379 sys.exit(1)
1380
1381 zf = ZipFile(args[1], 'r')
1382 out = args[2]
1383 for path in zf.namelist():
1384 if path.startswith('./'):
1385 tgt = os.path.join(out, path[2:])
1386 else:
1387 tgt = os.path.join(out, path)
1388
1389 tgtdir = os.path.dirname(tgt)
1390 if not os.path.exists(tgtdir):
1391 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001392 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001393 fp.write(zf.read(path))
1394 fp.close()
1395 zf.close()
1396
1397 elif args[0] == '-c':
1398 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001399 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001400 sys.exit(1)
1401
1402 def addToZip(zf, path, zippath):
1403 if os.path.isfile(path):
1404 zf.write(path, zippath, ZIP_DEFLATED)
1405 elif os.path.isdir(path):
1406 for nm in os.listdir(path):
1407 addToZip(zf,
1408 os.path.join(path, nm), os.path.join(zippath, nm))
1409 # else: ignore
1410
1411 zf = ZipFile(args[1], 'w', allowZip64=True)
1412 for src in args[2:]:
1413 addToZip(zf, src, os.path.basename(src))
1414
1415 zf.close()
1416
1417if __name__ == "__main__":
1418 main()