blob: 05c31438d7fdc150a2d322ee7ba82616183ce85a [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800293
294 if date_time[0] < 1980:
295 raise ValueError('ZIP does not support timestamps before 1980')
296
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.compress_type = ZIP_STORED # Type of compression for the file
299 self.comment = "" # Comment for each file
300 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000301 if sys.platform == 'win32':
302 self.create_system = 0 # System which created ZIP archive
303 else:
304 # Assume everything else is unix-y
305 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.create_version = 20 # Version which created ZIP archive
307 self.extract_version = 20 # Version needed to extract archive
308 self.reserved = 0 # Must be zero
309 self.flag_bits = 0 # ZIP flag bits
310 self.volume = 0 # Volume number of file header
311 self.internal_attr = 0 # Internal attributes
312 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000315 # CRC CRC-32 of the uncompressed file
316 # compress_size Size of the compressed file
317 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
319 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 dt = self.date_time
322 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000323 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000325 # Set these to zero because we write them after the file data
326 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 else:
Tim Peterse1190062001-01-15 03:34:38 +0000328 CRC = self.CRC
329 compress_size = self.compress_size
330 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000331
332 extra = self.extra
333
334 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
335 # File is larger than what fits into a 4 byte integer,
336 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000337 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 extra = extra + struct.pack(fmt,
339 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000340 file_size = 0xffffffff
341 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000342 self.extract_version = max(45, self.extract_version)
343 self.create_version = max(45, self.extract_version)
344
Martin v. Löwis471617d2008-05-05 17:16:58 +0000345 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000346 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000347 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 self.compress_type, dostime, dosdate, CRC,
349 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000350 len(filename), len(extra))
351 return header + filename + extra
352
353 def _encodeFilenameFlags(self):
354 if isinstance(self.filename, unicode):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
359 else:
360 return self.filename, self.flag_bits
361
362 def _decodeFilename(self):
363 if self.flag_bits & 0x800:
364 return self.filename.decode('utf-8')
365 else:
366 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 def _decodeExtra(self):
369 # Try to decode the extra field.
370 extra = self.extra
371 unpack = struct.unpack
372 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000373 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000374 if tp == 1:
375 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000380 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 elif ln == 0:
382 counts = ()
383 else:
384 raise RuntimeError, "Corrupt extra field %s"%(ln,)
385
386 idx = 0
387
388 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000389 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 self.file_size = counts[idx]
391 idx += 1
392
Martin v. Löwis8c436412008-07-03 12:51:14 +0000393 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 self.compress_size = counts[idx]
395 idx += 1
396
Martin v. Löwis8c436412008-07-03 12:51:14 +0000397 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000398 old = self.header_offset
399 self.header_offset = counts[idx]
400 idx+=1
401
402 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000403
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000405class _ZipDecrypter:
406 """Class to handle decryption of files stored within a ZIP archive.
407
408 ZIP supports a password-based form of encryption. Even though known
409 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000410 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000411
412 Usage:
413 zd = _ZipDecrypter(mypwd)
414 plain_char = zd(cypher_char)
415 plain_text = map(zd, cypher_text)
416 """
417
418 def _GenerateCRCTable():
419 """Generate a CRC-32 table.
420
421 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
422 internal keys. We noticed that a direct implementation is faster than
423 relying on binascii.crc32().
424 """
425 poly = 0xedb88320
426 table = [0] * 256
427 for i in range(256):
428 crc = i
429 for j in range(8):
430 if crc & 1:
431 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
432 else:
433 crc = ((crc >> 1) & 0x7FFFFFFF)
434 table[i] = crc
435 return table
436 crctable = _GenerateCRCTable()
437
438 def _crc32(self, ch, crc):
439 """Compute the CRC32 primitive on one byte."""
440 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
441
442 def __init__(self, pwd):
443 self.key0 = 305419896
444 self.key1 = 591751049
445 self.key2 = 878082192
446 for p in pwd:
447 self._UpdateKeys(p)
448
449 def _UpdateKeys(self, c):
450 self.key0 = self._crc32(c, self.key0)
451 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
452 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
453 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
454
455 def __call__(self, c):
456 """Decrypt a single character."""
457 c = ord(c)
458 k = self.key2 | 2
459 c = c ^ (((k * (k^1)) >> 8) & 255)
460 c = chr(c)
461 self._UpdateKeys(c)
462 return c
463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000465 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000466 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000467 """
Tim Petersea5962f2007-03-12 18:07:52 +0000468
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000469 # Max size supported by decompressor.
470 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000472 # Read from compressed files in 4k blocks.
473 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000474
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000475 # Search for universal newlines or line chunks.
476 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
477
478 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
479 self._fileobj = fileobj
480 self._decrypter = decrypter
481
Ezio Melotti4611b052010-01-28 01:41:30 +0000482 self._compress_type = zipinfo.compress_type
483 self._compress_size = zipinfo.compress_size
484 self._compress_left = zipinfo.compress_size
485
486 if self._compress_type == ZIP_DEFLATED:
487 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000488 self._unconsumed = ''
489
490 self._readbuffer = ''
491 self._offset = 0
492
493 self._universal = 'U' in mode
494 self.newlines = None
495
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000496 # Adjust read size for encrypted files since the first 12 bytes
497 # are for the encryption/password information.
498 if self._decrypter is not None:
499 self._compress_left -= 12
500
501 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000502 self.name = zipinfo.filename
503
Antoine Pitroue1436d12010-08-12 15:25:51 +0000504 if hasattr(zipinfo, 'CRC'):
505 self._expected_crc = zipinfo.CRC
506 self._running_crc = crc32(b'') & 0xffffffff
507 else:
508 self._expected_crc = None
509
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000510 def readline(self, limit=-1):
511 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000512
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000513 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000514 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000515
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000516 if not self._universal and limit < 0:
517 # Shortcut common case - newline found in buffer.
518 i = self._readbuffer.find('\n', self._offset) + 1
519 if i > 0:
520 line = self._readbuffer[self._offset: i]
521 self._offset = i
522 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000523
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000524 if not self._universal:
525 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000526
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000527 line = ''
528 while limit < 0 or len(line) < limit:
529 readahead = self.peek(2)
530 if readahead == '':
531 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000532
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000533 #
534 # Search for universal newlines or line chunks.
535 #
536 # The pattern returns either a line chunk or a newline, but not
537 # both. Combined with peek(2), we are assured that the sequence
538 # '\r\n' is always retrieved completely and never split into
539 # separate newlines - '\r', '\n' due to coincidental readaheads.
540 #
541 match = self.PATTERN.search(readahead)
542 newline = match.group('newline')
543 if newline is not None:
544 if self.newlines is None:
545 self.newlines = []
546 if newline not in self.newlines:
547 self.newlines.append(newline)
548 self._offset += len(newline)
549 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000550
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000551 chunk = match.group('chunk')
552 if limit >= 0:
553 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000554
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000555 self._offset += len(chunk)
556 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000557
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000558 return line
559
560 def peek(self, n=1):
561 """Returns buffered bytes without advancing the position."""
562 if n > len(self._readbuffer) - self._offset:
563 chunk = self.read(n)
564 self._offset -= len(chunk)
565
566 # Return up to 512 bytes to reduce allocation overhead for tight loops.
567 return self._readbuffer[self._offset: self._offset + 512]
568
569 def readable(self):
570 return True
571
572 def read(self, n=-1):
573 """Read and return up to n bytes.
574 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000575 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000576 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000577 if n is None:
578 n = -1
579 while True:
580 if n < 0:
581 data = self.read1(n)
582 elif n > len(buf):
583 data = self.read1(n - len(buf))
584 else:
585 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000586 if len(data) == 0:
587 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000589
Antoine Pitroue1436d12010-08-12 15:25:51 +0000590 def _update_crc(self, newdata, eof):
591 # Update the CRC using the given data.
592 if self._expected_crc is None:
593 # No need to compute the CRC if we don't have a reference value
594 return
595 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
596 # Check the CRC if we're at the end of the file
597 if eof and self._running_crc != self._expected_crc:
598 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
599
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000600 def read1(self, n):
601 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000602
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000603 # Simplify algorithm (branching) by transforming negative n to large n.
604 if n < 0 or n is None:
605 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000606
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000607 # Bytes available in read buffer.
608 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000609
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000610 # Read from file.
611 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
612 nbytes = n - len_readbuffer - len(self._unconsumed)
613 nbytes = max(nbytes, self.MIN_READ_SIZE)
614 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000615
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000616 data = self._fileobj.read(nbytes)
617 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000618
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000619 if data and self._decrypter is not None:
620 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000621
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000622 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000623 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000624 self._readbuffer = self._readbuffer[self._offset:] + data
625 self._offset = 0
626 else:
627 # Prepare deflated bytes for decompression.
628 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000629
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000631 if (len(self._unconsumed) > 0 and n > len_readbuffer and
632 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000633 data = self._decompressor.decompress(
634 self._unconsumed,
635 max(n - len_readbuffer, self.MIN_READ_SIZE)
636 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000637
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000638 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000639 eof = len(self._unconsumed) == 0 and self._compress_left == 0
640 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000641 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000642
Antoine Pitroue1436d12010-08-12 15:25:51 +0000643 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000644 self._readbuffer = self._readbuffer[self._offset:] + data
645 self._offset = 0
646
647 # Read from buffer.
648 data = self._readbuffer[self._offset: self._offset + n]
649 self._offset += len(data)
650 return data
651
Tim Petersea5962f2007-03-12 18:07:52 +0000652
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000653
R David Murray3f4ccba2012-04-12 18:42:47 -0400654class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000655 """ Class with methods to open, read, write, close, list zip files.
656
Martin v. Löwis8c436412008-07-03 12:51:14 +0000657 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000658
Fred Drake3d9091e2001-03-26 15:49:24 +0000659 file: Either the path to the file, or a file-like object.
660 If it is a path, the file will be opened and closed by ZipFile.
661 mode: The mode can be either read "r", write "w" or append "a".
662 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000663 allowZip64: if True ZipFile will create files with ZIP64 extensions when
664 needed, otherwise it will raise an exception when this would
665 be necessary.
666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 """
Fred Drake484d7352000-10-02 21:14:52 +0000668
Fred Drake90eac282001-02-28 05:29:34 +0000669 fp = None # Set here since __del__ checks it
670
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000671 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000672 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000673 if mode not in ("r", "w", "a"):
674 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
675
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 if compression == ZIP_STORED:
677 pass
678 elif compression == ZIP_DEFLATED:
679 if not zlib:
680 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000681 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000682 else:
683 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000684
685 self._allowZip64 = allowZip64
686 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000687 self.debug = 0 # Level of printing: 0 through 3
688 self.NameToInfo = {} # Find file info given name
689 self.filelist = [] # List of ZipInfo instances for archive
690 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000691 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000692 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400693 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000694
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000696 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000697 self._filePassed = 0
698 self.filename = file
699 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000700 try:
701 self.fp = open(file, modeDict[mode])
702 except IOError:
703 if mode == 'a':
704 mode = key = 'w'
705 self.fp = open(file, modeDict[mode])
706 else:
707 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 else:
709 self._filePassed = 1
710 self.fp = file
711 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000712
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 self._GetContents()
715 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000716 # set the modified flag so central directory gets written
717 # even if no files are added to the archive
718 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 elif key == 'a':
Georg Brandl86e0c892010-11-26 07:22:28 +0000720 try:
721 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000722 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000724 self.fp.seek(self.start_dir, 0)
Georg Brandl86e0c892010-11-26 07:22:28 +0000725 except BadZipfile:
726 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000727 self.fp.seek(0, 2)
Georg Brandl86e0c892010-11-26 07:22:28 +0000728
729 # set the modified flag so central directory gets written
730 # even if no files are added to the archive
731 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000733 if not self._filePassed:
734 self.fp.close()
735 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 raise RuntimeError, 'Mode must be "r", "w" or "a"'
737
Ezio Melotti569e61f2009-12-30 06:14:51 +0000738 def __enter__(self):
739 return self
740
741 def __exit__(self, type, value, traceback):
742 self.close()
743
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000745 """Read the directory, making sure we close the file if the format
746 is bad."""
747 try:
748 self._RealGetContents()
749 except BadZipfile:
750 if not self._filePassed:
751 self.fp.close()
752 self.fp = None
753 raise
754
755 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000756 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000758 try:
759 endrec = _EndRecData(fp)
760 except IOError:
761 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000762 if not endrec:
763 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000764 if self.debug > 1:
765 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000766 size_cd = endrec[_ECD_SIZE] # bytes in central directory
767 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400768 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000769
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000771 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000772 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
773 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000774 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
775
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000777 inferred = concat + offset_cd
778 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000779 # self.start_dir: Position of start of central directory
780 self.start_dir = offset_cd + concat
781 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000782 data = fp.read(size_cd)
783 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000784 total = 0
785 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000786 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000787 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 raise BadZipfile, "Bad magic number for central directory"
789 centdir = struct.unpack(structCentralDir, centdir)
790 if self.debug > 2:
791 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000792 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 # Create ZipInfo instance to store file information
794 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000795 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
796 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000797 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 (x.create_version, x.create_system, x.extract_version, x.reserved,
799 x.flag_bits, x.compress_type, t, d,
800 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
801 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
802 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000803 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000805 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000806
807 x._decodeExtra()
808 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000809 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 self.filelist.append(x)
811 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000812
813 # update total bytes read from central directory
814 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
815 + centdir[_CD_EXTRA_FIELD_LENGTH]
816 + centdir[_CD_COMMENT_LENGTH])
817
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 if self.debug > 2:
819 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000820
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821
822 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000823 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824 l = []
825 for data in self.filelist:
826 l.append(data.filename)
827 return l
828
829 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000830 """Return a list of class ZipInfo instances for files in the
831 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 return self.filelist
833
834 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000835 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
837 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000838 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000839 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
840
841 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000842 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000843 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 for zinfo in self.filelist:
845 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000846 # Read by chunks, to avoid an OverflowError or a
847 # MemoryError with very large embedded files.
848 f = self.open(zinfo.filename, "r")
849 while f.read(chunk_size): # Check CRC-32
850 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000851 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 return zinfo.filename
853
854 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000855 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000856 info = self.NameToInfo.get(name)
857 if info is None:
858 raise KeyError(
859 'There is no item named %r in the archive' % name)
860
861 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000863 def setpassword(self, pwd):
864 """Set default password for encrypted files."""
865 self.pwd = pwd
866
R David Murray3f4ccba2012-04-12 18:42:47 -0400867 @property
868 def comment(self):
869 """The comment text associated with the ZIP file."""
870 return self._comment
871
872 @comment.setter
873 def comment(self, comment):
874 # check for valid comment length
875 if len(comment) >= ZIP_MAX_COMMENT:
876 if self.debug:
877 print('Archive comment is too long; truncating to %d bytes'
878 % ZIP_MAX_COMMENT)
879 comment = comment[:ZIP_MAX_COMMENT]
880 self._comment = comment
881 self._didModify = True
882
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000883 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000884 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000885 return self.open(name, "r", pwd).read()
886
887 def open(self, name, mode="r", pwd=None):
888 """Return file-like object for 'name'."""
889 if mode not in ("r", "U", "rU"):
890 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000891 if not self.fp:
892 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000893 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000894
Tim Petersea5962f2007-03-12 18:07:52 +0000895 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000896 # given a file object in the constructor
897 if self._filePassed:
898 zef_file = self.fp
899 else:
900 zef_file = open(self.filename, 'rb')
901
Georg Brandl112aa502008-05-20 08:25:48 +0000902 # Make sure we have an info object
903 if isinstance(name, ZipInfo):
904 # 'name' is already an info object
905 zinfo = name
906 else:
907 # Get info object for name
908 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000909
910 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000911
912 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000913 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000914 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000915 raise BadZipfile, "Bad magic number for file header"
916
917 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000918 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000919 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000920 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000921
922 if fname != zinfo.orig_filename:
923 raise BadZipfile, \
924 'File name in directory "%s" and header "%s" differ.' % (
925 zinfo.orig_filename, fname)
926
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000927 # check for encrypted flag & handle password
928 is_encrypted = zinfo.flag_bits & 0x1
929 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000930 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000931 if not pwd:
932 pwd = self.pwd
933 if not pwd:
934 raise RuntimeError, "File %s is encrypted, " \
935 "password required for extraction" % name
936
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000937 zd = _ZipDecrypter(pwd)
938 # The first 12 bytes in the cypher stream is an encryption header
939 # used to strengthen the algorithm. The first 11 bytes are
940 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000941 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000942 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000943 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000944 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000945 if zinfo.flag_bits & 0x8:
946 # compare against the file type from extended local headers
947 check_byte = (zinfo._raw_time >> 8) & 0xff
948 else:
949 # compare against the CRC otherwise
950 check_byte = (zinfo.CRC >> 24) & 0xff
951 if ord(h[11]) != check_byte:
952 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000953
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000954 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000955
Georg Brandl62416bc2008-01-07 18:47:44 +0000956 def extract(self, member, path=None, pwd=None):
957 """Extract a member from the archive to the current working directory,
958 using its full name. Its file information is extracted as accurately
959 as possible. `member' may be a filename or a ZipInfo object. You can
960 specify a different directory using `path'.
961 """
962 if not isinstance(member, ZipInfo):
963 member = self.getinfo(member)
964
965 if path is None:
966 path = os.getcwd()
967
968 return self._extract_member(member, path, pwd)
969
970 def extractall(self, path=None, members=None, pwd=None):
971 """Extract all members from the archive to the current working
972 directory. `path' specifies a different directory to extract to.
973 `members' is optional and must be a subset of the list returned
974 by namelist().
975 """
976 if members is None:
977 members = self.namelist()
978
979 for zipinfo in members:
980 self.extract(zipinfo, path, pwd)
981
982 def _extract_member(self, member, targetpath, pwd):
983 """Extract the ZipInfo object 'member' to a physical
984 file on the path targetpath.
985 """
986 # build the destination pathname, replacing
987 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000988 # Strip trailing path separator, unless it represents the root.
989 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
990 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000991 targetpath = targetpath[:-1]
992
993 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000994 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000995 targetpath = os.path.join(targetpath, member.filename[1:])
996 else:
997 targetpath = os.path.join(targetpath, member.filename)
998
999 targetpath = os.path.normpath(targetpath)
1000
1001 # Create all upper directories if necessary.
1002 upperdirs = os.path.dirname(targetpath)
1003 if upperdirs and not os.path.exists(upperdirs):
1004 os.makedirs(upperdirs)
1005
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001006 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001007 if not os.path.isdir(targetpath):
1008 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001009 return targetpath
1010
Georg Brandl112aa502008-05-20 08:25:48 +00001011 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +00001012 target = file(targetpath, "wb")
1013 shutil.copyfileobj(source, target)
1014 source.close()
1015 target.close()
1016
1017 return targetpath
1018
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001020 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001021 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001022 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 print "Duplicate name:", zinfo.filename
1024 if self.mode not in ("w", "a"):
1025 raise RuntimeError, 'write() requires mode "w" or "a"'
1026 if not self.fp:
1027 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001028 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1030 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001031 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1033 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001034 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001035 if zinfo.file_size > ZIP64_LIMIT:
1036 if not self._allowZip64:
1037 raise LargeZipFile("Filesize would require ZIP64 extensions")
1038 if zinfo.header_offset > ZIP64_LIMIT:
1039 if not self._allowZip64:
1040 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041
1042 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001043 """Put the bytes from filename into the archive under the name
1044 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001045 if not self.fp:
1046 raise RuntimeError(
1047 "Attempt to write to ZIP archive that was already closed")
1048
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001049 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001050 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001051 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001052 date_time = mtime[0:6]
1053 # Create ZipInfo instance to store file information
1054 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001055 arcname = filename
1056 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1057 while arcname[0] in (os.sep, os.altsep):
1058 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001059 if isdir:
1060 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001061 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001062 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001064 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065 else:
Tim Peterse1190062001-01-15 03:34:38 +00001066 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001067
1068 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001069 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001070 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001071
1072 self._writecheck(zinfo)
1073 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001074
1075 if isdir:
1076 zinfo.file_size = 0
1077 zinfo.compress_size = 0
1078 zinfo.CRC = 0
1079 self.filelist.append(zinfo)
1080 self.NameToInfo[zinfo.filename] = zinfo
1081 self.fp.write(zinfo.FileHeader())
1082 return
1083
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001084 with open(filename, "rb") as fp:
1085 # Must overwrite CRC and sizes with correct data later
1086 zinfo.CRC = CRC = 0
1087 zinfo.compress_size = compress_size = 0
1088 zinfo.file_size = file_size = 0
1089 self.fp.write(zinfo.FileHeader())
1090 if zinfo.compress_type == ZIP_DEFLATED:
1091 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1092 zlib.DEFLATED, -15)
1093 else:
1094 cmpr = None
1095 while 1:
1096 buf = fp.read(1024 * 8)
1097 if not buf:
1098 break
1099 file_size = file_size + len(buf)
1100 CRC = crc32(buf, CRC) & 0xffffffff
1101 if cmpr:
1102 buf = cmpr.compress(buf)
1103 compress_size = compress_size + len(buf)
1104 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 if cmpr:
1106 buf = cmpr.flush()
1107 compress_size = compress_size + len(buf)
1108 self.fp.write(buf)
1109 zinfo.compress_size = compress_size
1110 else:
1111 zinfo.compress_size = file_size
1112 zinfo.CRC = CRC
1113 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001114 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001115 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001116 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001117 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001119 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 self.filelist.append(zinfo)
1121 self.NameToInfo[zinfo.filename] = zinfo
1122
Ronald Oussorendd25e862010-02-07 20:18:02 +00001123 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001124 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001125 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1126 the name of the file in the archive."""
1127 if not isinstance(zinfo_or_arcname, ZipInfo):
1128 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001129 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001130
Just van Rossumb083cb32002-12-12 12:23:32 +00001131 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001132 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001133 else:
1134 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001135
1136 if not self.fp:
1137 raise RuntimeError(
1138 "Attempt to write to ZIP archive that was already closed")
1139
Ronald Oussorendd25e862010-02-07 20:18:02 +00001140 if compress_type is not None:
1141 zinfo.compress_type = compress_type
1142
Tim Peterse1190062001-01-15 03:34:38 +00001143 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001144 zinfo.header_offset = self.fp.tell() # Start of header bytes
1145 self._writecheck(zinfo)
1146 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001147 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 if zinfo.compress_type == ZIP_DEFLATED:
1149 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1150 zlib.DEFLATED, -15)
1151 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001152 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001153 else:
1154 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001155 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001156 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001158 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001160 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001161 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001162 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001163 self.filelist.append(zinfo)
1164 self.NameToInfo[zinfo.filename] = zinfo
1165
1166 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001167 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001168 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169
1170 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001171 """Close the file, and for mode "w" and "a" write the ending
1172 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001173 if self.fp is None:
1174 return
Tim Petersa608bb22006-06-15 18:06:29 +00001175
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001176 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177 count = 0
1178 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001179 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 count = count + 1
1181 dt = zinfo.date_time
1182 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001183 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001184 extra = []
1185 if zinfo.file_size > ZIP64_LIMIT \
1186 or zinfo.compress_size > ZIP64_LIMIT:
1187 extra.append(zinfo.file_size)
1188 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001189 file_size = 0xffffffff
1190 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001191 else:
1192 file_size = zinfo.file_size
1193 compress_size = zinfo.compress_size
1194
1195 if zinfo.header_offset > ZIP64_LIMIT:
1196 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001197 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001198 else:
1199 header_offset = zinfo.header_offset
1200
1201 extra_data = zinfo.extra
1202 if extra:
1203 # Append a ZIP64 field to the extra's
1204 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001205 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001206 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001207
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001208 extract_version = max(45, zinfo.extract_version)
1209 create_version = max(45, zinfo.create_version)
1210 else:
1211 extract_version = zinfo.extract_version
1212 create_version = zinfo.create_version
1213
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001214 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001215 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001216 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001217 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001218 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001219 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001220 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001221 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001222 0, zinfo.internal_attr, zinfo.external_attr,
1223 header_offset)
1224 except DeprecationWarning:
1225 print >>sys.stderr, (structCentralDir,
1226 stringCentralDir, create_version,
1227 zinfo.create_system, extract_version, zinfo.reserved,
1228 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1229 zinfo.CRC, compress_size, file_size,
1230 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1231 0, zinfo.internal_attr, zinfo.external_attr,
1232 header_offset)
1233 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001234 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001235 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001236 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001237 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001238
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001239 pos2 = self.fp.tell()
1240 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001241 centDirCount = count
1242 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001243 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001244 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1245 centDirOffset > ZIP64_LIMIT or
1246 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001247 # Need to write the ZIP64 end-of-archive records
1248 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001249 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001250 44, 45, 45, 0, 0, centDirCount, centDirCount,
1251 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001252 self.fp.write(zip64endrec)
1253
1254 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001255 structEndArchive64Locator,
1256 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001257 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001258 centDirCount = min(centDirCount, 0xFFFF)
1259 centDirSize = min(centDirSize, 0xFFFFFFFF)
1260 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001261
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001262 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001263 0, 0, centDirCount, centDirCount,
R David Murray3f4ccba2012-04-12 18:42:47 -04001264 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001265 self.fp.write(endrec)
R David Murray3f4ccba2012-04-12 18:42:47 -04001266 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001267 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001268
Fred Drake3d9091e2001-03-26 15:49:24 +00001269 if not self._filePassed:
1270 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001271 self.fp = None
1272
1273
1274class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001275 """Class to create ZIP archives with Python library files and packages."""
1276
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277 def writepy(self, pathname, basename = ""):
1278 """Add all files from "pathname" to the ZIP archive.
1279
Fred Drake484d7352000-10-02 21:14:52 +00001280 If pathname is a package directory, search the directory and
1281 all package subdirectories recursively for all *.py and enter
1282 the modules into the archive. If pathname is a plain
1283 directory, listdir *.py and enter all modules. Else, pathname
1284 must be a Python *.py file and the module will be put into the
1285 archive. Added modules are always module.pyo or module.pyc.
1286 This method will compile the module.py into module.pyc if
1287 necessary.
1288 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001289 dir, name = os.path.split(pathname)
1290 if os.path.isdir(pathname):
1291 initname = os.path.join(pathname, "__init__.py")
1292 if os.path.isfile(initname):
1293 # This is a package directory, add it
1294 if basename:
1295 basename = "%s/%s" % (basename, name)
1296 else:
1297 basename = name
1298 if self.debug:
1299 print "Adding package in", pathname, "as", basename
1300 fname, arcname = self._get_codename(initname[0:-3], basename)
1301 if self.debug:
1302 print "Adding", arcname
1303 self.write(fname, arcname)
1304 dirlist = os.listdir(pathname)
1305 dirlist.remove("__init__.py")
1306 # Add all *.py files and package subdirectories
1307 for filename in dirlist:
1308 path = os.path.join(pathname, filename)
1309 root, ext = os.path.splitext(filename)
1310 if os.path.isdir(path):
1311 if os.path.isfile(os.path.join(path, "__init__.py")):
1312 # This is a package directory, add it
1313 self.writepy(path, basename) # Recursive call
1314 elif ext == ".py":
1315 fname, arcname = self._get_codename(path[0:-3],
1316 basename)
1317 if self.debug:
1318 print "Adding", arcname
1319 self.write(fname, arcname)
1320 else:
1321 # This is NOT a package directory, add its files at top level
1322 if self.debug:
1323 print "Adding files from directory", pathname
1324 for filename in os.listdir(pathname):
1325 path = os.path.join(pathname, filename)
1326 root, ext = os.path.splitext(filename)
1327 if ext == ".py":
1328 fname, arcname = self._get_codename(path[0:-3],
1329 basename)
1330 if self.debug:
1331 print "Adding", arcname
1332 self.write(fname, arcname)
1333 else:
1334 if pathname[-3:] != ".py":
1335 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001336 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 fname, arcname = self._get_codename(pathname[0:-3], basename)
1338 if self.debug:
1339 print "Adding file", arcname
1340 self.write(fname, arcname)
1341
1342 def _get_codename(self, pathname, basename):
1343 """Return (filename, archivename) for the path.
1344
Fred Drake484d7352000-10-02 21:14:52 +00001345 Given a module name path, return the correct file path and
1346 archive name, compiling if necessary. For example, given
1347 /python/lib/string, return (/python/lib/string.pyc, string).
1348 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 file_py = pathname + ".py"
1350 file_pyc = pathname + ".pyc"
1351 file_pyo = pathname + ".pyo"
1352 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001353 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001354 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001355 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001356 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001357 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 if self.debug:
1359 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001360 try:
1361 py_compile.compile(file_py, file_pyc, None, True)
1362 except py_compile.PyCompileError,err:
1363 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001364 fname = file_pyc
1365 else:
1366 fname = file_pyc
1367 archivename = os.path.split(fname)[1]
1368 if basename:
1369 archivename = "%s/%s" % (basename, archivename)
1370 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001371
1372
1373def main(args = None):
1374 import textwrap
1375 USAGE=textwrap.dedent("""\
1376 Usage:
1377 zipfile.py -l zipfile.zip # Show listing of a zipfile
1378 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1379 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1380 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1381 """)
1382 if args is None:
1383 args = sys.argv[1:]
1384
1385 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1386 print USAGE
1387 sys.exit(1)
1388
1389 if args[0] == '-l':
1390 if len(args) != 2:
1391 print USAGE
1392 sys.exit(1)
1393 zf = ZipFile(args[1], 'r')
1394 zf.printdir()
1395 zf.close()
1396
1397 elif args[0] == '-t':
1398 if len(args) != 2:
1399 print USAGE
1400 sys.exit(1)
1401 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001402 badfile = zf.testzip()
1403 if badfile:
1404 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001405 print "Done testing"
1406
1407 elif args[0] == '-e':
1408 if len(args) != 3:
1409 print USAGE
1410 sys.exit(1)
1411
1412 zf = ZipFile(args[1], 'r')
1413 out = args[2]
1414 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001415 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001416 tgt = os.path.join(out, path[2:])
1417 else:
1418 tgt = os.path.join(out, path)
1419
1420 tgtdir = os.path.dirname(tgt)
1421 if not os.path.exists(tgtdir):
1422 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001423 with open(tgt, 'wb') as fp:
1424 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001425 zf.close()
1426
1427 elif args[0] == '-c':
1428 if len(args) < 3:
1429 print USAGE
1430 sys.exit(1)
1431
1432 def addToZip(zf, path, zippath):
1433 if os.path.isfile(path):
1434 zf.write(path, zippath, ZIP_DEFLATED)
1435 elif os.path.isdir(path):
1436 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001437 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001438 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001439 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001440
1441 zf = ZipFile(args[1], 'w', allowZip64=True)
1442 for src in args[2:]:
1443 addToZip(zf, src, os.path.basename(src))
1444
1445 zf.close()
1446
1447if __name__ == "__main__":
1448 main()