blob: 496cf83fda6f6cd4c0762e17d71341fc1481b6bf [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
Tim Petersa608bb22006-06-15 18:06:29 +0000170 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000208 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000209 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append("")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000270 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
291 self.comment = "" # Comment for each file
292 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000329 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 if isinstance(self.filename, unicode):
347 try:
348 return self.filename.encode('ascii'), self.flag_bits
349 except UnicodeEncodeError:
350 return self.filename.encode('utf-8'), self.flag_bits | 0x800
351 else:
352 return self.filename, self.flag_bits
353
354 def _decodeFilename(self):
355 if self.flag_bits & 0x800:
356 return self.filename.decode('utf-8')
357 else:
358 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000365 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 if tp == 1:
367 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000370 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000371 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000372 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 elif ln == 0:
374 counts = ()
375 else:
376 raise RuntimeError, "Corrupt extra field %s"%(ln,)
377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000381 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwis8c436412008-07-03 12:51:14 +0000385 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwis8c436412008-07-03 12:51:14 +0000389 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000395
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000396
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000402 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
445 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
446
447 def __call__(self, c):
448 """Decrypt a single character."""
449 c = ord(c)
450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
452 c = chr(c)
453 self._UpdateKeys(c)
454 return c
455
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000456class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000458 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 """
Tim Petersea5962f2007-03-12 18:07:52 +0000460
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000461 # Max size supported by decompressor.
462 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464 # Read from compressed files in 4k blocks.
465 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000466
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000467 # Search for universal newlines or line chunks.
468 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
469
470 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
471 self._fileobj = fileobj
472 self._decrypter = decrypter
473
Ezio Melotti4611b052010-01-28 01:41:30 +0000474 self._compress_type = zipinfo.compress_type
475 self._compress_size = zipinfo.compress_size
476 self._compress_left = zipinfo.compress_size
477
478 if self._compress_type == ZIP_DEFLATED:
479 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000480 self._unconsumed = ''
481
482 self._readbuffer = ''
483 self._offset = 0
484
485 self._universal = 'U' in mode
486 self.newlines = None
487
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000488 # Adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information.
490 if self._decrypter is not None:
491 self._compress_left -= 12
492
493 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000494 self.name = zipinfo.filename
495
Antoine Pitroue1436d12010-08-12 15:25:51 +0000496 if hasattr(zipinfo, 'CRC'):
497 self._expected_crc = zipinfo.CRC
498 self._running_crc = crc32(b'') & 0xffffffff
499 else:
500 self._expected_crc = None
501
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000502 def readline(self, limit=-1):
503 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000504
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000505 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000506 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000507
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000508 if not self._universal and limit < 0:
509 # Shortcut common case - newline found in buffer.
510 i = self._readbuffer.find('\n', self._offset) + 1
511 if i > 0:
512 line = self._readbuffer[self._offset: i]
513 self._offset = i
514 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000515
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000516 if not self._universal:
517 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000518
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000519 line = ''
520 while limit < 0 or len(line) < limit:
521 readahead = self.peek(2)
522 if readahead == '':
523 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000524
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000525 #
526 # Search for universal newlines or line chunks.
527 #
528 # The pattern returns either a line chunk or a newline, but not
529 # both. Combined with peek(2), we are assured that the sequence
530 # '\r\n' is always retrieved completely and never split into
531 # separate newlines - '\r', '\n' due to coincidental readaheads.
532 #
533 match = self.PATTERN.search(readahead)
534 newline = match.group('newline')
535 if newline is not None:
536 if self.newlines is None:
537 self.newlines = []
538 if newline not in self.newlines:
539 self.newlines.append(newline)
540 self._offset += len(newline)
541 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000542
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000543 chunk = match.group('chunk')
544 if limit >= 0:
545 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000546
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000547 self._offset += len(chunk)
548 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000549
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000550 return line
551
552 def peek(self, n=1):
553 """Returns buffered bytes without advancing the position."""
554 if n > len(self._readbuffer) - self._offset:
555 chunk = self.read(n)
556 self._offset -= len(chunk)
557
558 # Return up to 512 bytes to reduce allocation overhead for tight loops.
559 return self._readbuffer[self._offset: self._offset + 512]
560
561 def readable(self):
562 return True
563
564 def read(self, n=-1):
565 """Read and return up to n bytes.
566 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000567 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000568 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000569 if n is None:
570 n = -1
571 while True:
572 if n < 0:
573 data = self.read1(n)
574 elif n > len(buf):
575 data = self.read1(n - len(buf))
576 else:
577 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000578 if len(data) == 0:
579 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000580 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000581
Antoine Pitroue1436d12010-08-12 15:25:51 +0000582 def _update_crc(self, newdata, eof):
583 # Update the CRC using the given data.
584 if self._expected_crc is None:
585 # No need to compute the CRC if we don't have a reference value
586 return
587 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
588 # Check the CRC if we're at the end of the file
589 if eof and self._running_crc != self._expected_crc:
590 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
591
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000592 def read1(self, n):
593 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000594
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000595 # Simplify algorithm (branching) by transforming negative n to large n.
596 if n < 0 or n is None:
597 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000598
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000599 # Bytes available in read buffer.
600 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000601
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000602 # Read from file.
603 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
604 nbytes = n - len_readbuffer - len(self._unconsumed)
605 nbytes = max(nbytes, self.MIN_READ_SIZE)
606 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000607
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000608 data = self._fileobj.read(nbytes)
609 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000610
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000611 if data and self._decrypter is not None:
612 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000613
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000614 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000615 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000616 self._readbuffer = self._readbuffer[self._offset:] + data
617 self._offset = 0
618 else:
619 # Prepare deflated bytes for decompression.
620 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000621
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000622 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000623 if (len(self._unconsumed) > 0 and n > len_readbuffer and
624 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000625 data = self._decompressor.decompress(
626 self._unconsumed,
627 max(n - len_readbuffer, self.MIN_READ_SIZE)
628 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000629
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000631 eof = len(self._unconsumed) == 0 and self._compress_left == 0
632 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000633 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000634
Antoine Pitroue1436d12010-08-12 15:25:51 +0000635 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000636 self._readbuffer = self._readbuffer[self._offset:] + data
637 self._offset = 0
638
639 # Read from buffer.
640 data = self._readbuffer[self._offset: self._offset + n]
641 self._offset += len(data)
642 return data
643
Tim Petersea5962f2007-03-12 18:07:52 +0000644
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000645
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000646class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000647 """ Class with methods to open, read, write, close, list zip files.
648
Martin v. Löwis8c436412008-07-03 12:51:14 +0000649 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000650
Fred Drake3d9091e2001-03-26 15:49:24 +0000651 file: Either the path to the file, or a file-like object.
652 If it is a path, the file will be opened and closed by ZipFile.
653 mode: The mode can be either read "r", write "w" or append "a".
654 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000655 allowZip64: if True ZipFile will create files with ZIP64 extensions when
656 needed, otherwise it will raise an exception when this would
657 be necessary.
658
Fred Drake3d9091e2001-03-26 15:49:24 +0000659 """
Fred Drake484d7352000-10-02 21:14:52 +0000660
Fred Drake90eac282001-02-28 05:29:34 +0000661 fp = None # Set here since __del__ checks it
662
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000663 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000664 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000665 if mode not in ("r", "w", "a"):
666 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
667
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000668 if compression == ZIP_STORED:
669 pass
670 elif compression == ZIP_DEFLATED:
671 if not zlib:
672 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000673 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000674 else:
675 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000676
677 self._allowZip64 = allowZip64
678 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000679 self.debug = 0 # Level of printing: 0 through 3
680 self.NameToInfo = {} # Find file info given name
681 self.filelist = [] # List of ZipInfo instances for archive
682 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000683 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000684 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000685 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000686
Fred Drake3d9091e2001-03-26 15:49:24 +0000687 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000688 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000689 self._filePassed = 0
690 self.filename = file
691 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000692 try:
693 self.fp = open(file, modeDict[mode])
694 except IOError:
695 if mode == 'a':
696 mode = key = 'w'
697 self.fp = open(file, modeDict[mode])
698 else:
699 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 else:
701 self._filePassed = 1
702 self.fp = file
703 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000704
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000705 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 self._GetContents()
707 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 try: # See if file is a zip file
711 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000713 self.fp.seek(self.start_dir, 0)
714 except BadZipfile: # file is not a zip file, just append
715 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000717 if not self._filePassed:
718 self.fp.close()
719 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 raise RuntimeError, 'Mode must be "r", "w" or "a"'
721
Ezio Melotti569e61f2009-12-30 06:14:51 +0000722 def __enter__(self):
723 return self
724
725 def __exit__(self, type, value, traceback):
726 self.close()
727
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000729 """Read the directory, making sure we close the file if the format
730 is bad."""
731 try:
732 self._RealGetContents()
733 except BadZipfile:
734 if not self._filePassed:
735 self.fp.close()
736 self.fp = None
737 raise
738
739 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000740 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000742 endrec = _EndRecData(fp)
743 if not endrec:
744 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000745 if self.debug > 1:
746 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000747 size_cd = endrec[_ECD_SIZE] # bytes in central directory
748 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
749 self.comment = endrec[_ECD_COMMENT] # archive comment
750
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000752 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000753 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
754 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000755 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
756
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000758 inferred = concat + offset_cd
759 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 # self.start_dir: Position of start of central directory
761 self.start_dir = offset_cd + concat
762 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000763 data = fp.read(size_cd)
764 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 total = 0
766 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000767 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000768 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 raise BadZipfile, "Bad magic number for central directory"
770 centdir = struct.unpack(structCentralDir, centdir)
771 if self.debug > 2:
772 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000773 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774 # Create ZipInfo instance to store file information
775 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000776 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
777 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000778 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000779 (x.create_version, x.create_system, x.extract_version, x.reserved,
780 x.flag_bits, x.compress_type, t, d,
781 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
782 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
783 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000784 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000785 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000786 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000787
788 x._decodeExtra()
789 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000790 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 self.filelist.append(x)
792 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000793
794 # update total bytes read from central directory
795 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
796 + centdir[_CD_EXTRA_FIELD_LENGTH]
797 + centdir[_CD_COMMENT_LENGTH])
798
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 if self.debug > 2:
800 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000801
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802
803 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000804 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 l = []
806 for data in self.filelist:
807 l.append(data.filename)
808 return l
809
810 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000811 """Return a list of class ZipInfo instances for files in the
812 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 return self.filelist
814
815 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000816 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
818 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000819 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
821
822 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000823 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000824 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 for zinfo in self.filelist:
826 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000827 # Read by chunks, to avoid an OverflowError or a
828 # MemoryError with very large embedded files.
829 f = self.open(zinfo.filename, "r")
830 while f.read(chunk_size): # Check CRC-32
831 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000832 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 return zinfo.filename
834
835 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000836 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000837 info = self.NameToInfo.get(name)
838 if info is None:
839 raise KeyError(
840 'There is no item named %r in the archive' % name)
841
842 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000844 def setpassword(self, pwd):
845 """Set default password for encrypted files."""
846 self.pwd = pwd
847
848 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000849 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000850 return self.open(name, "r", pwd).read()
851
852 def open(self, name, mode="r", pwd=None):
853 """Return file-like object for 'name'."""
854 if mode not in ("r", "U", "rU"):
855 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000856 if not self.fp:
857 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000858 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000859
Tim Petersea5962f2007-03-12 18:07:52 +0000860 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000861 # given a file object in the constructor
862 if self._filePassed:
863 zef_file = self.fp
864 else:
865 zef_file = open(self.filename, 'rb')
866
Georg Brandl112aa502008-05-20 08:25:48 +0000867 # Make sure we have an info object
868 if isinstance(name, ZipInfo):
869 # 'name' is already an info object
870 zinfo = name
871 else:
872 # Get info object for name
873 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000874
875 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000876
877 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000878 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000879 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000880 raise BadZipfile, "Bad magic number for file header"
881
882 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000883 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000884 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000885 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000886
887 if fname != zinfo.orig_filename:
888 raise BadZipfile, \
889 'File name in directory "%s" and header "%s" differ.' % (
890 zinfo.orig_filename, fname)
891
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000892 # check for encrypted flag & handle password
893 is_encrypted = zinfo.flag_bits & 0x1
894 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000895 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000896 if not pwd:
897 pwd = self.pwd
898 if not pwd:
899 raise RuntimeError, "File %s is encrypted, " \
900 "password required for extraction" % name
901
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000902 zd = _ZipDecrypter(pwd)
903 # The first 12 bytes in the cypher stream is an encryption header
904 # used to strengthen the algorithm. The first 11 bytes are
905 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000906 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000907 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000908 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000909 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000910 if zinfo.flag_bits & 0x8:
911 # compare against the file type from extended local headers
912 check_byte = (zinfo._raw_time >> 8) & 0xff
913 else:
914 # compare against the CRC otherwise
915 check_byte = (zinfo.CRC >> 24) & 0xff
916 if ord(h[11]) != check_byte:
917 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000918
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000919 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000920
Georg Brandl62416bc2008-01-07 18:47:44 +0000921 def extract(self, member, path=None, pwd=None):
922 """Extract a member from the archive to the current working directory,
923 using its full name. Its file information is extracted as accurately
924 as possible. `member' may be a filename or a ZipInfo object. You can
925 specify a different directory using `path'.
926 """
927 if not isinstance(member, ZipInfo):
928 member = self.getinfo(member)
929
930 if path is None:
931 path = os.getcwd()
932
933 return self._extract_member(member, path, pwd)
934
935 def extractall(self, path=None, members=None, pwd=None):
936 """Extract all members from the archive to the current working
937 directory. `path' specifies a different directory to extract to.
938 `members' is optional and must be a subset of the list returned
939 by namelist().
940 """
941 if members is None:
942 members = self.namelist()
943
944 for zipinfo in members:
945 self.extract(zipinfo, path, pwd)
946
947 def _extract_member(self, member, targetpath, pwd):
948 """Extract the ZipInfo object 'member' to a physical
949 file on the path targetpath.
950 """
951 # build the destination pathname, replacing
952 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000953 # Strip trailing path separator, unless it represents the root.
954 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
955 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000956 targetpath = targetpath[:-1]
957
958 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000959 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000960 targetpath = os.path.join(targetpath, member.filename[1:])
961 else:
962 targetpath = os.path.join(targetpath, member.filename)
963
964 targetpath = os.path.normpath(targetpath)
965
966 # Create all upper directories if necessary.
967 upperdirs = os.path.dirname(targetpath)
968 if upperdirs and not os.path.exists(upperdirs):
969 os.makedirs(upperdirs)
970
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000971 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000972 if not os.path.isdir(targetpath):
973 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000974 return targetpath
975
Georg Brandl112aa502008-05-20 08:25:48 +0000976 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000977 target = file(targetpath, "wb")
978 shutil.copyfileobj(source, target)
979 source.close()
980 target.close()
981
982 return targetpath
983
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000985 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000986 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000987 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000988 print "Duplicate name:", zinfo.filename
989 if self.mode not in ("w", "a"):
990 raise RuntimeError, 'write() requires mode "w" or "a"'
991 if not self.fp:
992 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000993 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
995 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000996 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
998 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000999 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001000 if zinfo.file_size > ZIP64_LIMIT:
1001 if not self._allowZip64:
1002 raise LargeZipFile("Filesize would require ZIP64 extensions")
1003 if zinfo.header_offset > ZIP64_LIMIT:
1004 if not self._allowZip64:
1005 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006
1007 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001008 """Put the bytes from filename into the archive under the name
1009 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001010 if not self.fp:
1011 raise RuntimeError(
1012 "Attempt to write to ZIP archive that was already closed")
1013
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001014 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001015 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001016 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017 date_time = mtime[0:6]
1018 # Create ZipInfo instance to store file information
1019 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001020 arcname = filename
1021 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1022 while arcname[0] in (os.sep, os.altsep):
1023 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001024 if isdir:
1025 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001026 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001027 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001029 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001030 else:
Tim Peterse1190062001-01-15 03:34:38 +00001031 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001032
1033 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001034 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001035 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001036
1037 self._writecheck(zinfo)
1038 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001039
1040 if isdir:
1041 zinfo.file_size = 0
1042 zinfo.compress_size = 0
1043 zinfo.CRC = 0
1044 self.filelist.append(zinfo)
1045 self.NameToInfo[zinfo.filename] = zinfo
1046 self.fp.write(zinfo.FileHeader())
1047 return
1048
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001049 with open(filename, "rb") as fp:
1050 # Must overwrite CRC and sizes with correct data later
1051 zinfo.CRC = CRC = 0
1052 zinfo.compress_size = compress_size = 0
1053 zinfo.file_size = file_size = 0
1054 self.fp.write(zinfo.FileHeader())
1055 if zinfo.compress_type == ZIP_DEFLATED:
1056 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1057 zlib.DEFLATED, -15)
1058 else:
1059 cmpr = None
1060 while 1:
1061 buf = fp.read(1024 * 8)
1062 if not buf:
1063 break
1064 file_size = file_size + len(buf)
1065 CRC = crc32(buf, CRC) & 0xffffffff
1066 if cmpr:
1067 buf = cmpr.compress(buf)
1068 compress_size = compress_size + len(buf)
1069 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070 if cmpr:
1071 buf = cmpr.flush()
1072 compress_size = compress_size + len(buf)
1073 self.fp.write(buf)
1074 zinfo.compress_size = compress_size
1075 else:
1076 zinfo.compress_size = file_size
1077 zinfo.CRC = CRC
1078 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001079 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001080 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001081 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001082 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001083 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001084 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 self.filelist.append(zinfo)
1086 self.NameToInfo[zinfo.filename] = zinfo
1087
Ronald Oussorendd25e862010-02-07 20:18:02 +00001088 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001089 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001090 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1091 the name of the file in the archive."""
1092 if not isinstance(zinfo_or_arcname, ZipInfo):
1093 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001094 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001095
Just van Rossumb083cb32002-12-12 12:23:32 +00001096 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001097 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001098 else:
1099 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001100
1101 if not self.fp:
1102 raise RuntimeError(
1103 "Attempt to write to ZIP archive that was already closed")
1104
Ronald Oussorendd25e862010-02-07 20:18:02 +00001105 if compress_type is not None:
1106 zinfo.compress_type = compress_type
1107
Tim Peterse1190062001-01-15 03:34:38 +00001108 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001109 zinfo.header_offset = self.fp.tell() # Start of header bytes
1110 self._writecheck(zinfo)
1111 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001112 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001113 if zinfo.compress_type == ZIP_DEFLATED:
1114 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1115 zlib.DEFLATED, -15)
1116 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001117 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 else:
1119 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001120 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001121 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001123 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001124 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001125 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001126 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001127 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128 self.filelist.append(zinfo)
1129 self.NameToInfo[zinfo.filename] = zinfo
1130
1131 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001132 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001133 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001134
1135 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001136 """Close the file, and for mode "w" and "a" write the ending
1137 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001138 if self.fp is None:
1139 return
Tim Petersa608bb22006-06-15 18:06:29 +00001140
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001141 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 count = 0
1143 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001144 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001145 count = count + 1
1146 dt = zinfo.date_time
1147 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001148 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001149 extra = []
1150 if zinfo.file_size > ZIP64_LIMIT \
1151 or zinfo.compress_size > ZIP64_LIMIT:
1152 extra.append(zinfo.file_size)
1153 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001154 file_size = 0xffffffff
1155 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001156 else:
1157 file_size = zinfo.file_size
1158 compress_size = zinfo.compress_size
1159
1160 if zinfo.header_offset > ZIP64_LIMIT:
1161 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001162 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001163 else:
1164 header_offset = zinfo.header_offset
1165
1166 extra_data = zinfo.extra
1167 if extra:
1168 # Append a ZIP64 field to the extra's
1169 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001170 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001171 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001172
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001173 extract_version = max(45, zinfo.extract_version)
1174 create_version = max(45, zinfo.create_version)
1175 else:
1176 extract_version = zinfo.extract_version
1177 create_version = zinfo.create_version
1178
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001179 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001180 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001181 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001182 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001183 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001184 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001185 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001186 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001187 0, zinfo.internal_attr, zinfo.external_attr,
1188 header_offset)
1189 except DeprecationWarning:
1190 print >>sys.stderr, (structCentralDir,
1191 stringCentralDir, create_version,
1192 zinfo.create_system, extract_version, zinfo.reserved,
1193 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1194 zinfo.CRC, compress_size, file_size,
1195 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1196 0, zinfo.internal_attr, zinfo.external_attr,
1197 header_offset)
1198 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001200 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001201 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001202 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001203
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 pos2 = self.fp.tell()
1205 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001206 centDirCount = count
1207 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001208 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001209 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1210 centDirOffset > ZIP64_LIMIT or
1211 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001212 # Need to write the ZIP64 end-of-archive records
1213 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001214 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001215 44, 45, 45, 0, 0, centDirCount, centDirCount,
1216 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001217 self.fp.write(zip64endrec)
1218
1219 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001220 structEndArchive64Locator,
1221 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001222 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001223 centDirCount = min(centDirCount, 0xFFFF)
1224 centDirSize = min(centDirSize, 0xFFFFFFFF)
1225 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001226
Martin v. Löwis8c436412008-07-03 12:51:14 +00001227 # check for valid comment length
1228 if len(self.comment) >= ZIP_MAX_COMMENT:
1229 if self.debug > 0:
1230 msg = 'Archive comment is too long; truncating to %d bytes' \
1231 % ZIP_MAX_COMMENT
1232 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001233
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001234 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001235 0, 0, centDirCount, centDirCount,
1236 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001237 self.fp.write(endrec)
1238 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001239 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001240
Fred Drake3d9091e2001-03-26 15:49:24 +00001241 if not self._filePassed:
1242 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243 self.fp = None
1244
1245
1246class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001247 """Class to create ZIP archives with Python library files and packages."""
1248
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249 def writepy(self, pathname, basename = ""):
1250 """Add all files from "pathname" to the ZIP archive.
1251
Fred Drake484d7352000-10-02 21:14:52 +00001252 If pathname is a package directory, search the directory and
1253 all package subdirectories recursively for all *.py and enter
1254 the modules into the archive. If pathname is a plain
1255 directory, listdir *.py and enter all modules. Else, pathname
1256 must be a Python *.py file and the module will be put into the
1257 archive. Added modules are always module.pyo or module.pyc.
1258 This method will compile the module.py into module.pyc if
1259 necessary.
1260 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 dir, name = os.path.split(pathname)
1262 if os.path.isdir(pathname):
1263 initname = os.path.join(pathname, "__init__.py")
1264 if os.path.isfile(initname):
1265 # This is a package directory, add it
1266 if basename:
1267 basename = "%s/%s" % (basename, name)
1268 else:
1269 basename = name
1270 if self.debug:
1271 print "Adding package in", pathname, "as", basename
1272 fname, arcname = self._get_codename(initname[0:-3], basename)
1273 if self.debug:
1274 print "Adding", arcname
1275 self.write(fname, arcname)
1276 dirlist = os.listdir(pathname)
1277 dirlist.remove("__init__.py")
1278 # Add all *.py files and package subdirectories
1279 for filename in dirlist:
1280 path = os.path.join(pathname, filename)
1281 root, ext = os.path.splitext(filename)
1282 if os.path.isdir(path):
1283 if os.path.isfile(os.path.join(path, "__init__.py")):
1284 # This is a package directory, add it
1285 self.writepy(path, basename) # Recursive call
1286 elif ext == ".py":
1287 fname, arcname = self._get_codename(path[0:-3],
1288 basename)
1289 if self.debug:
1290 print "Adding", arcname
1291 self.write(fname, arcname)
1292 else:
1293 # This is NOT a package directory, add its files at top level
1294 if self.debug:
1295 print "Adding files from directory", pathname
1296 for filename in os.listdir(pathname):
1297 path = os.path.join(pathname, filename)
1298 root, ext = os.path.splitext(filename)
1299 if ext == ".py":
1300 fname, arcname = self._get_codename(path[0:-3],
1301 basename)
1302 if self.debug:
1303 print "Adding", arcname
1304 self.write(fname, arcname)
1305 else:
1306 if pathname[-3:] != ".py":
1307 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001308 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001309 fname, arcname = self._get_codename(pathname[0:-3], basename)
1310 if self.debug:
1311 print "Adding file", arcname
1312 self.write(fname, arcname)
1313
1314 def _get_codename(self, pathname, basename):
1315 """Return (filename, archivename) for the path.
1316
Fred Drake484d7352000-10-02 21:14:52 +00001317 Given a module name path, return the correct file path and
1318 archive name, compiling if necessary. For example, given
1319 /python/lib/string, return (/python/lib/string.pyc, string).
1320 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 file_py = pathname + ".py"
1322 file_pyc = pathname + ".pyc"
1323 file_pyo = pathname + ".pyo"
1324 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001325 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001326 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001327 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001328 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001329 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001330 if self.debug:
1331 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001332 try:
1333 py_compile.compile(file_py, file_pyc, None, True)
1334 except py_compile.PyCompileError,err:
1335 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 fname = file_pyc
1337 else:
1338 fname = file_pyc
1339 archivename = os.path.split(fname)[1]
1340 if basename:
1341 archivename = "%s/%s" % (basename, archivename)
1342 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001343
1344
1345def main(args = None):
1346 import textwrap
1347 USAGE=textwrap.dedent("""\
1348 Usage:
1349 zipfile.py -l zipfile.zip # Show listing of a zipfile
1350 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1351 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1352 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1353 """)
1354 if args is None:
1355 args = sys.argv[1:]
1356
1357 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1358 print USAGE
1359 sys.exit(1)
1360
1361 if args[0] == '-l':
1362 if len(args) != 2:
1363 print USAGE
1364 sys.exit(1)
1365 zf = ZipFile(args[1], 'r')
1366 zf.printdir()
1367 zf.close()
1368
1369 elif args[0] == '-t':
1370 if len(args) != 2:
1371 print USAGE
1372 sys.exit(1)
1373 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001374 badfile = zf.testzip()
1375 if badfile:
1376 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001377 print "Done testing"
1378
1379 elif args[0] == '-e':
1380 if len(args) != 3:
1381 print USAGE
1382 sys.exit(1)
1383
1384 zf = ZipFile(args[1], 'r')
1385 out = args[2]
1386 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001387 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001388 tgt = os.path.join(out, path[2:])
1389 else:
1390 tgt = os.path.join(out, path)
1391
1392 tgtdir = os.path.dirname(tgt)
1393 if not os.path.exists(tgtdir):
1394 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001395 with open(tgt, 'wb') as fp:
1396 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001397 zf.close()
1398
1399 elif args[0] == '-c':
1400 if len(args) < 3:
1401 print USAGE
1402 sys.exit(1)
1403
1404 def addToZip(zf, path, zippath):
1405 if os.path.isfile(path):
1406 zf.write(path, zippath, ZIP_DEFLATED)
1407 elif os.path.isdir(path):
1408 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001409 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001410 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001411 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001412
1413 zf = ZipFile(args[1], 'w', allowZip64=True)
1414 for src in args[2:]:
1415 addToZip(zf, src, os.path.basename(src))
1416
1417 zf.close()
1418
1419if __name__ == "__main__":
1420 main()