blob: 94e4e0f059c8ed708b276c143a04d6229c149fbf [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800293
294 if date_time[0] < 1980:
295 raise ValueError('ZIP does not support timestamps before 1980')
296
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.compress_type = ZIP_STORED # Type of compression for the file
299 self.comment = "" # Comment for each file
300 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000301 if sys.platform == 'win32':
302 self.create_system = 0 # System which created ZIP archive
303 else:
304 # Assume everything else is unix-y
305 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.create_version = 20 # Version which created ZIP archive
307 self.extract_version = 20 # Version needed to extract archive
308 self.reserved = 0 # Must be zero
309 self.flag_bits = 0 # ZIP flag bits
310 self.volume = 0 # Volume number of file header
311 self.internal_attr = 0 # Internal attributes
312 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000315 # CRC CRC-32 of the uncompressed file
316 # compress_size Size of the compressed file
317 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
319 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 dt = self.date_time
322 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000323 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000325 # Set these to zero because we write them after the file data
326 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 else:
Tim Peterse1190062001-01-15 03:34:38 +0000328 CRC = self.CRC
329 compress_size = self.compress_size
330 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000331
332 extra = self.extra
333
334 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
335 # File is larger than what fits into a 4 byte integer,
336 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000337 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 extra = extra + struct.pack(fmt,
339 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000340 file_size = 0xffffffff
341 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000342 self.extract_version = max(45, self.extract_version)
343 self.create_version = max(45, self.extract_version)
344
Martin v. Löwis471617d2008-05-05 17:16:58 +0000345 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000346 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000347 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 self.compress_type, dostime, dosdate, CRC,
349 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000350 len(filename), len(extra))
351 return header + filename + extra
352
353 def _encodeFilenameFlags(self):
354 if isinstance(self.filename, unicode):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
359 else:
360 return self.filename, self.flag_bits
361
362 def _decodeFilename(self):
363 if self.flag_bits & 0x800:
364 return self.filename.decode('utf-8')
365 else:
366 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 def _decodeExtra(self):
369 # Try to decode the extra field.
370 extra = self.extra
371 unpack = struct.unpack
372 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000373 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000374 if tp == 1:
375 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000380 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 elif ln == 0:
382 counts = ()
383 else:
384 raise RuntimeError, "Corrupt extra field %s"%(ln,)
385
386 idx = 0
387
388 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000389 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 self.file_size = counts[idx]
391 idx += 1
392
Martin v. Löwis8c436412008-07-03 12:51:14 +0000393 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 self.compress_size = counts[idx]
395 idx += 1
396
Martin v. Löwis8c436412008-07-03 12:51:14 +0000397 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000398 old = self.header_offset
399 self.header_offset = counts[idx]
400 idx+=1
401
402 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000403
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000405class _ZipDecrypter:
406 """Class to handle decryption of files stored within a ZIP archive.
407
408 ZIP supports a password-based form of encryption. Even though known
409 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000410 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000411
412 Usage:
413 zd = _ZipDecrypter(mypwd)
414 plain_char = zd(cypher_char)
415 plain_text = map(zd, cypher_text)
416 """
417
418 def _GenerateCRCTable():
419 """Generate a CRC-32 table.
420
421 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
422 internal keys. We noticed that a direct implementation is faster than
423 relying on binascii.crc32().
424 """
425 poly = 0xedb88320
426 table = [0] * 256
427 for i in range(256):
428 crc = i
429 for j in range(8):
430 if crc & 1:
431 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
432 else:
433 crc = ((crc >> 1) & 0x7FFFFFFF)
434 table[i] = crc
435 return table
436 crctable = _GenerateCRCTable()
437
438 def _crc32(self, ch, crc):
439 """Compute the CRC32 primitive on one byte."""
440 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
441
442 def __init__(self, pwd):
443 self.key0 = 305419896
444 self.key1 = 591751049
445 self.key2 = 878082192
446 for p in pwd:
447 self._UpdateKeys(p)
448
449 def _UpdateKeys(self, c):
450 self.key0 = self._crc32(c, self.key0)
451 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
452 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
453 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
454
455 def __call__(self, c):
456 """Decrypt a single character."""
457 c = ord(c)
458 k = self.key2 | 2
459 c = c ^ (((k * (k^1)) >> 8) & 255)
460 c = chr(c)
461 self._UpdateKeys(c)
462 return c
463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000465 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000466 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000467 """
Tim Petersea5962f2007-03-12 18:07:52 +0000468
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000469 # Max size supported by decompressor.
470 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000472 # Read from compressed files in 4k blocks.
473 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000474
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000475 # Search for universal newlines or line chunks.
476 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
477
Jesus Cea93d628b2012-11-04 02:32:08 +0100478 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
479 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000480 self._fileobj = fileobj
481 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100482 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000483
Ezio Melotti4611b052010-01-28 01:41:30 +0000484 self._compress_type = zipinfo.compress_type
485 self._compress_size = zipinfo.compress_size
486 self._compress_left = zipinfo.compress_size
487
488 if self._compress_type == ZIP_DEFLATED:
489 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000490 self._unconsumed = ''
491
492 self._readbuffer = ''
493 self._offset = 0
494
495 self._universal = 'U' in mode
496 self.newlines = None
497
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000498 # Adjust read size for encrypted files since the first 12 bytes
499 # are for the encryption/password information.
500 if self._decrypter is not None:
501 self._compress_left -= 12
502
503 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000504 self.name = zipinfo.filename
505
Antoine Pitroue1436d12010-08-12 15:25:51 +0000506 if hasattr(zipinfo, 'CRC'):
507 self._expected_crc = zipinfo.CRC
508 self._running_crc = crc32(b'') & 0xffffffff
509 else:
510 self._expected_crc = None
511
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000512 def readline(self, limit=-1):
513 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000514
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000515 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000516 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000517
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000518 if not self._universal and limit < 0:
519 # Shortcut common case - newline found in buffer.
520 i = self._readbuffer.find('\n', self._offset) + 1
521 if i > 0:
522 line = self._readbuffer[self._offset: i]
523 self._offset = i
524 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000525
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000526 if not self._universal:
527 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000528
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000529 line = ''
530 while limit < 0 or len(line) < limit:
531 readahead = self.peek(2)
532 if readahead == '':
533 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000534
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000535 #
536 # Search for universal newlines or line chunks.
537 #
538 # The pattern returns either a line chunk or a newline, but not
539 # both. Combined with peek(2), we are assured that the sequence
540 # '\r\n' is always retrieved completely and never split into
541 # separate newlines - '\r', '\n' due to coincidental readaheads.
542 #
543 match = self.PATTERN.search(readahead)
544 newline = match.group('newline')
545 if newline is not None:
546 if self.newlines is None:
547 self.newlines = []
548 if newline not in self.newlines:
549 self.newlines.append(newline)
550 self._offset += len(newline)
551 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000552
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000553 chunk = match.group('chunk')
554 if limit >= 0:
555 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000556
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000557 self._offset += len(chunk)
558 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000559
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000560 return line
561
562 def peek(self, n=1):
563 """Returns buffered bytes without advancing the position."""
564 if n > len(self._readbuffer) - self._offset:
565 chunk = self.read(n)
566 self._offset -= len(chunk)
567
568 # Return up to 512 bytes to reduce allocation overhead for tight loops.
569 return self._readbuffer[self._offset: self._offset + 512]
570
571 def readable(self):
572 return True
573
574 def read(self, n=-1):
575 """Read and return up to n bytes.
576 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000577 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000578 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000579 if n is None:
580 n = -1
581 while True:
582 if n < 0:
583 data = self.read1(n)
584 elif n > len(buf):
585 data = self.read1(n - len(buf))
586 else:
587 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 if len(data) == 0:
589 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000590 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000591
Antoine Pitroue1436d12010-08-12 15:25:51 +0000592 def _update_crc(self, newdata, eof):
593 # Update the CRC using the given data.
594 if self._expected_crc is None:
595 # No need to compute the CRC if we don't have a reference value
596 return
597 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
598 # Check the CRC if we're at the end of the file
599 if eof and self._running_crc != self._expected_crc:
600 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
601
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000602 def read1(self, n):
603 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000604
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000605 # Simplify algorithm (branching) by transforming negative n to large n.
606 if n < 0 or n is None:
607 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000608
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000609 # Bytes available in read buffer.
610 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000611
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000612 # Read from file.
613 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
614 nbytes = n - len_readbuffer - len(self._unconsumed)
615 nbytes = max(nbytes, self.MIN_READ_SIZE)
616 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000617
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000618 data = self._fileobj.read(nbytes)
619 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000620
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000621 if data and self._decrypter is not None:
622 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000623
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000624 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000625 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000626 self._readbuffer = self._readbuffer[self._offset:] + data
627 self._offset = 0
628 else:
629 # Prepare deflated bytes for decompression.
630 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000631
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000632 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000633 if (len(self._unconsumed) > 0 and n > len_readbuffer and
634 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000635 data = self._decompressor.decompress(
636 self._unconsumed,
637 max(n - len_readbuffer, self.MIN_READ_SIZE)
638 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000639
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000640 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000641 eof = len(self._unconsumed) == 0 and self._compress_left == 0
642 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000643 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000644
Antoine Pitroue1436d12010-08-12 15:25:51 +0000645 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000646 self._readbuffer = self._readbuffer[self._offset:] + data
647 self._offset = 0
648
649 # Read from buffer.
650 data = self._readbuffer[self._offset: self._offset + n]
651 self._offset += len(data)
652 return data
653
Jesus Cea93d628b2012-11-04 02:32:08 +0100654 def close(self):
655 try :
656 if self._close_fileobj:
657 self._fileobj.close()
658 finally:
659 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000660
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000661
R David Murray3f4ccba2012-04-12 18:42:47 -0400662class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000663 """ Class with methods to open, read, write, close, list zip files.
664
Martin v. Löwis8c436412008-07-03 12:51:14 +0000665 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 file: Either the path to the file, or a file-like object.
668 If it is a path, the file will be opened and closed by ZipFile.
669 mode: The mode can be either read "r", write "w" or append "a".
670 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000671 allowZip64: if True ZipFile will create files with ZIP64 extensions when
672 needed, otherwise it will raise an exception when this would
673 be necessary.
674
Fred Drake3d9091e2001-03-26 15:49:24 +0000675 """
Fred Drake484d7352000-10-02 21:14:52 +0000676
Fred Drake90eac282001-02-28 05:29:34 +0000677 fp = None # Set here since __del__ checks it
678
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000679 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000680 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000681 if mode not in ("r", "w", "a"):
682 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
683
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 if compression == ZIP_STORED:
685 pass
686 elif compression == ZIP_DEFLATED:
687 if not zlib:
688 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000689 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 else:
691 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000692
693 self._allowZip64 = allowZip64
694 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000695 self.debug = 0 # Level of printing: 0 through 3
696 self.NameToInfo = {} # Find file info given name
697 self.filelist = [] # List of ZipInfo instances for archive
698 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000699 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000700 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400701 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000704 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000705 self._filePassed = 0
706 self.filename = file
707 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000708 try:
709 self.fp = open(file, modeDict[mode])
710 except IOError:
711 if mode == 'a':
712 mode = key = 'w'
713 self.fp = open(file, modeDict[mode])
714 else:
715 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000716 else:
717 self._filePassed = 1
718 self.fp = file
719 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000720
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100721 try:
722 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000723 self._RealGetContents()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100724 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000725 # set the modified flag so central directory gets written
726 # even if no files are added to the archive
727 self._didModify = True
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100728 elif key == 'a':
729 try:
730 # See if file is a zip file
731 self._RealGetContents()
732 # seek to start of directory and overwrite
733 self.fp.seek(self.start_dir, 0)
734 except BadZipfile:
735 # file is not a zip file, just append
736 self.fp.seek(0, 2)
737
738 # set the modified flag so central directory gets written
739 # even if no files are added to the archive
740 self._didModify = True
741 else:
742 raise RuntimeError('Mode must be "r", "w" or "a"')
743 except:
744 fp = self.fp
745 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000746 if not self._filePassed:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100747 fp.close()
748 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749
Ezio Melotti569e61f2009-12-30 06:14:51 +0000750 def __enter__(self):
751 return self
752
753 def __exit__(self, type, value, traceback):
754 self.close()
755
Tim Peters7d3bad62001-04-04 18:56:49 +0000756 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000757 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000758 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000759 try:
760 endrec = _EndRecData(fp)
761 except IOError:
762 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000763 if not endrec:
764 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 if self.debug > 1:
766 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000767 size_cd = endrec[_ECD_SIZE] # bytes in central directory
768 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400769 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000770
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000772 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000773 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
774 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000775 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
776
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000778 inferred = concat + offset_cd
779 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 # self.start_dir: Position of start of central directory
781 self.start_dir = offset_cd + concat
782 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000783 data = fp.read(size_cd)
784 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000785 total = 0
786 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000787 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000788 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 raise BadZipfile, "Bad magic number for central directory"
790 centdir = struct.unpack(structCentralDir, centdir)
791 if self.debug > 2:
792 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000793 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 # Create ZipInfo instance to store file information
795 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000796 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
797 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000798 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 (x.create_version, x.create_system, x.extract_version, x.reserved,
800 x.flag_bits, x.compress_type, t, d,
801 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
802 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
803 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000804 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000806 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000807
808 x._decodeExtra()
809 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000810 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 self.filelist.append(x)
812 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000813
814 # update total bytes read from central directory
815 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
816 + centdir[_CD_EXTRA_FIELD_LENGTH]
817 + centdir[_CD_COMMENT_LENGTH])
818
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 if self.debug > 2:
820 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000821
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822
823 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000824 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 l = []
826 for data in self.filelist:
827 l.append(data.filename)
828 return l
829
830 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Return a list of class ZipInfo instances for files in the
832 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 return self.filelist
834
835 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000836 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
838 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000839 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
841
842 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000843 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000844 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000845 for zinfo in self.filelist:
846 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000847 # Read by chunks, to avoid an OverflowError or a
848 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100849 with self.open(zinfo.filename, "r") as f:
850 while f.read(chunk_size): # Check CRC-32
851 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000852 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000853 return zinfo.filename
854
855 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000856 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000857 info = self.NameToInfo.get(name)
858 if info is None:
859 raise KeyError(
860 'There is no item named %r in the archive' % name)
861
862 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000863
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000864 def setpassword(self, pwd):
865 """Set default password for encrypted files."""
866 self.pwd = pwd
867
R David Murray3f4ccba2012-04-12 18:42:47 -0400868 @property
869 def comment(self):
870 """The comment text associated with the ZIP file."""
871 return self._comment
872
873 @comment.setter
874 def comment(self, comment):
875 # check for valid comment length
876 if len(comment) >= ZIP_MAX_COMMENT:
877 if self.debug:
878 print('Archive comment is too long; truncating to %d bytes'
879 % ZIP_MAX_COMMENT)
880 comment = comment[:ZIP_MAX_COMMENT]
881 self._comment = comment
882 self._didModify = True
883
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000884 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000885 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000886 return self.open(name, "r", pwd).read()
887
888 def open(self, name, mode="r", pwd=None):
889 """Return file-like object for 'name'."""
890 if mode not in ("r", "U", "rU"):
891 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000892 if not self.fp:
893 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000894 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000895
Tim Petersea5962f2007-03-12 18:07:52 +0000896 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000897 # given a file object in the constructor
898 if self._filePassed:
899 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100900 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000901 else:
902 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100903 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100905 try:
906 # Make sure we have an info object
907 if isinstance(name, ZipInfo):
908 # 'name' is already an info object
909 zinfo = name
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000910 else:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100911 # Get info object for name
912 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000913
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100914 zef_file.seek(zinfo.header_offset, 0)
915
916 # Skip the file header:
917 fheader = zef_file.read(sizeFileHeader)
918 if fheader[0:4] != stringFileHeader:
919 raise BadZipfile, "Bad magic number for file header"
920
921 fheader = struct.unpack(structFileHeader, fheader)
922 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
923 if fheader[_FH_EXTRA_FIELD_LENGTH]:
924 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
925
926 if fname != zinfo.orig_filename:
927 raise BadZipfile, \
928 'File name in directory "%s" and header "%s" differ.' % (
929 zinfo.orig_filename, fname)
930
931 # check for encrypted flag & handle password
932 is_encrypted = zinfo.flag_bits & 0x1
933 zd = None
934 if is_encrypted:
935 if not pwd:
936 pwd = self.pwd
937 if not pwd:
938 raise RuntimeError, "File %s is encrypted, " \
939 "password required for extraction" % name
940
941 zd = _ZipDecrypter(pwd)
942 # The first 12 bytes in the cypher stream is an encryption header
943 # used to strengthen the algorithm. The first 11 bytes are
944 # completely random, while the 12th contains the MSB of the CRC,
945 # or the MSB of the file time depending on the header type
946 # and is used to check the correctness of the password.
947 bytes = zef_file.read(12)
948 h = map(zd, bytes[0:12])
949 if zinfo.flag_bits & 0x8:
950 # compare against the file type from extended local headers
951 check_byte = (zinfo._raw_time >> 8) & 0xff
952 else:
953 # compare against the CRC otherwise
954 check_byte = (zinfo.CRC >> 24) & 0xff
955 if ord(h[11]) != check_byte:
956 raise RuntimeError("Bad password for file", name)
957
958 return ZipExtFile(zef_file, mode, zinfo, zd,
959 close_fileobj=should_close)
960 except:
961 if should_close:
962 zef_file.close()
963 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964
Georg Brandl62416bc2008-01-07 18:47:44 +0000965 def extract(self, member, path=None, pwd=None):
966 """Extract a member from the archive to the current working directory,
967 using its full name. Its file information is extracted as accurately
968 as possible. `member' may be a filename or a ZipInfo object. You can
969 specify a different directory using `path'.
970 """
971 if not isinstance(member, ZipInfo):
972 member = self.getinfo(member)
973
974 if path is None:
975 path = os.getcwd()
976
977 return self._extract_member(member, path, pwd)
978
979 def extractall(self, path=None, members=None, pwd=None):
980 """Extract all members from the archive to the current working
981 directory. `path' specifies a different directory to extract to.
982 `members' is optional and must be a subset of the list returned
983 by namelist().
984 """
985 if members is None:
986 members = self.namelist()
987
988 for zipinfo in members:
989 self.extract(zipinfo, path, pwd)
990
991 def _extract_member(self, member, targetpath, pwd):
992 """Extract the ZipInfo object 'member' to a physical
993 file on the path targetpath.
994 """
995 # build the destination pathname, replacing
996 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000997 # Strip trailing path separator, unless it represents the root.
998 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
999 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +00001000 targetpath = targetpath[:-1]
1001
1002 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001003 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +00001004 targetpath = os.path.join(targetpath, member.filename[1:])
1005 else:
1006 targetpath = os.path.join(targetpath, member.filename)
1007
1008 targetpath = os.path.normpath(targetpath)
1009
1010 # Create all upper directories if necessary.
1011 upperdirs = os.path.dirname(targetpath)
1012 if upperdirs and not os.path.exists(upperdirs):
1013 os.makedirs(upperdirs)
1014
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001015 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001016 if not os.path.isdir(targetpath):
1017 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001018 return targetpath
1019
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001020 with self.open(member, pwd=pwd) as source, \
1021 file(targetpath, "wb") as target:
1022 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001023
1024 return targetpath
1025
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001027 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001028 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001029 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001030 print "Duplicate name:", zinfo.filename
1031 if self.mode not in ("w", "a"):
1032 raise RuntimeError, 'write() requires mode "w" or "a"'
1033 if not self.fp:
1034 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001035 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1037 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001038 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001039 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1040 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001041 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001042 if zinfo.file_size > ZIP64_LIMIT:
1043 if not self._allowZip64:
1044 raise LargeZipFile("Filesize would require ZIP64 extensions")
1045 if zinfo.header_offset > ZIP64_LIMIT:
1046 if not self._allowZip64:
1047 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048
1049 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001050 """Put the bytes from filename into the archive under the name
1051 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001052 if not self.fp:
1053 raise RuntimeError(
1054 "Attempt to write to ZIP archive that was already closed")
1055
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001057 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001058 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 date_time = mtime[0:6]
1060 # Create ZipInfo instance to store file information
1061 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001062 arcname = filename
1063 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1064 while arcname[0] in (os.sep, os.altsep):
1065 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001066 if isdir:
1067 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001068 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001069 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001071 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 else:
Tim Peterse1190062001-01-15 03:34:38 +00001073 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001074
1075 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001076 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001077 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001078
1079 self._writecheck(zinfo)
1080 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001081
1082 if isdir:
1083 zinfo.file_size = 0
1084 zinfo.compress_size = 0
1085 zinfo.CRC = 0
1086 self.filelist.append(zinfo)
1087 self.NameToInfo[zinfo.filename] = zinfo
1088 self.fp.write(zinfo.FileHeader())
1089 return
1090
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001091 with open(filename, "rb") as fp:
1092 # Must overwrite CRC and sizes with correct data later
1093 zinfo.CRC = CRC = 0
1094 zinfo.compress_size = compress_size = 0
1095 zinfo.file_size = file_size = 0
1096 self.fp.write(zinfo.FileHeader())
1097 if zinfo.compress_type == ZIP_DEFLATED:
1098 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1099 zlib.DEFLATED, -15)
1100 else:
1101 cmpr = None
1102 while 1:
1103 buf = fp.read(1024 * 8)
1104 if not buf:
1105 break
1106 file_size = file_size + len(buf)
1107 CRC = crc32(buf, CRC) & 0xffffffff
1108 if cmpr:
1109 buf = cmpr.compress(buf)
1110 compress_size = compress_size + len(buf)
1111 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 if cmpr:
1113 buf = cmpr.flush()
1114 compress_size = compress_size + len(buf)
1115 self.fp.write(buf)
1116 zinfo.compress_size = compress_size
1117 else:
1118 zinfo.compress_size = file_size
1119 zinfo.CRC = CRC
1120 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001121 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001122 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001123 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001124 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001126 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 self.filelist.append(zinfo)
1128 self.NameToInfo[zinfo.filename] = zinfo
1129
Ronald Oussorendd25e862010-02-07 20:18:02 +00001130 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001131 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001132 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1133 the name of the file in the archive."""
1134 if not isinstance(zinfo_or_arcname, ZipInfo):
1135 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001136 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001137
Just van Rossumb083cb32002-12-12 12:23:32 +00001138 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001139 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001140 else:
1141 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001142
1143 if not self.fp:
1144 raise RuntimeError(
1145 "Attempt to write to ZIP archive that was already closed")
1146
Ronald Oussorendd25e862010-02-07 20:18:02 +00001147 if compress_type is not None:
1148 zinfo.compress_type = compress_type
1149
Tim Peterse1190062001-01-15 03:34:38 +00001150 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001151 zinfo.header_offset = self.fp.tell() # Start of header bytes
1152 self._writecheck(zinfo)
1153 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001154 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155 if zinfo.compress_type == ZIP_DEFLATED:
1156 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1157 zlib.DEFLATED, -15)
1158 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001159 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001160 else:
1161 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001162 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001163 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001165 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001166 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001167 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001168 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001169 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001170 self.filelist.append(zinfo)
1171 self.NameToInfo[zinfo.filename] = zinfo
1172
1173 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001174 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001175 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176
1177 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001178 """Close the file, and for mode "w" and "a" write the ending
1179 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001180 if self.fp is None:
1181 return
Tim Petersa608bb22006-06-15 18:06:29 +00001182
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001183 try:
1184 if self.mode in ("w", "a") and self._didModify: # write ending records
1185 count = 0
1186 pos1 = self.fp.tell()
1187 for zinfo in self.filelist: # write central directory
1188 count = count + 1
1189 dt = zinfo.date_time
1190 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1191 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1192 extra = []
1193 if zinfo.file_size > ZIP64_LIMIT \
1194 or zinfo.compress_size > ZIP64_LIMIT:
1195 extra.append(zinfo.file_size)
1196 extra.append(zinfo.compress_size)
1197 file_size = 0xffffffff
1198 compress_size = 0xffffffff
1199 else:
1200 file_size = zinfo.file_size
1201 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001202
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001203 if zinfo.header_offset > ZIP64_LIMIT:
1204 extra.append(zinfo.header_offset)
1205 header_offset = 0xffffffffL
1206 else:
1207 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001208
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001209 extra_data = zinfo.extra
1210 if extra:
1211 # Append a ZIP64 field to the extra's
1212 extra_data = struct.pack(
1213 '<HH' + 'Q'*len(extra),
1214 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001215
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001216 extract_version = max(45, zinfo.extract_version)
1217 create_version = max(45, zinfo.create_version)
1218 else:
1219 extract_version = zinfo.extract_version
1220 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001221
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001222 try:
1223 filename, flag_bits = zinfo._encodeFilenameFlags()
1224 centdir = struct.pack(structCentralDir,
1225 stringCentralDir, create_version,
1226 zinfo.create_system, extract_version, zinfo.reserved,
1227 flag_bits, zinfo.compress_type, dostime, dosdate,
1228 zinfo.CRC, compress_size, file_size,
1229 len(filename), len(extra_data), len(zinfo.comment),
1230 0, zinfo.internal_attr, zinfo.external_attr,
1231 header_offset)
1232 except DeprecationWarning:
1233 print >>sys.stderr, (structCentralDir,
1234 stringCentralDir, create_version,
1235 zinfo.create_system, extract_version, zinfo.reserved,
1236 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1237 zinfo.CRC, compress_size, file_size,
1238 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1239 0, zinfo.internal_attr, zinfo.external_attr,
1240 header_offset)
1241 raise
1242 self.fp.write(centdir)
1243 self.fp.write(filename)
1244 self.fp.write(extra_data)
1245 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001246
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001247 pos2 = self.fp.tell()
1248 # Write end-of-zip-archive record
1249 centDirCount = count
1250 centDirSize = pos2 - pos1
1251 centDirOffset = pos1
1252 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1253 centDirOffset > ZIP64_LIMIT or
1254 centDirSize > ZIP64_LIMIT):
1255 # Need to write the ZIP64 end-of-archive records
1256 zip64endrec = struct.pack(
1257 structEndArchive64, stringEndArchive64,
1258 44, 45, 45, 0, 0, centDirCount, centDirCount,
1259 centDirSize, centDirOffset)
1260 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001261
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001262 zip64locrec = struct.pack(
1263 structEndArchive64Locator,
1264 stringEndArchive64Locator, 0, pos2, 1)
1265 self.fp.write(zip64locrec)
1266 centDirCount = min(centDirCount, 0xFFFF)
1267 centDirSize = min(centDirSize, 0xFFFFFFFF)
1268 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001269
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001270 endrec = struct.pack(structEndArchive, stringEndArchive,
1271 0, 0, centDirCount, centDirCount,
1272 centDirSize, centDirOffset, len(self._comment))
1273 self.fp.write(endrec)
1274 self.fp.write(self._comment)
1275 self.fp.flush()
1276 finally:
1277 fp = self.fp
1278 self.fp = None
1279 if not self._filePassed:
1280 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281
1282
1283class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001284 """Class to create ZIP archives with Python library files and packages."""
1285
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 def writepy(self, pathname, basename = ""):
1287 """Add all files from "pathname" to the ZIP archive.
1288
Fred Drake484d7352000-10-02 21:14:52 +00001289 If pathname is a package directory, search the directory and
1290 all package subdirectories recursively for all *.py and enter
1291 the modules into the archive. If pathname is a plain
1292 directory, listdir *.py and enter all modules. Else, pathname
1293 must be a Python *.py file and the module will be put into the
1294 archive. Added modules are always module.pyo or module.pyc.
1295 This method will compile the module.py into module.pyc if
1296 necessary.
1297 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 dir, name = os.path.split(pathname)
1299 if os.path.isdir(pathname):
1300 initname = os.path.join(pathname, "__init__.py")
1301 if os.path.isfile(initname):
1302 # This is a package directory, add it
1303 if basename:
1304 basename = "%s/%s" % (basename, name)
1305 else:
1306 basename = name
1307 if self.debug:
1308 print "Adding package in", pathname, "as", basename
1309 fname, arcname = self._get_codename(initname[0:-3], basename)
1310 if self.debug:
1311 print "Adding", arcname
1312 self.write(fname, arcname)
1313 dirlist = os.listdir(pathname)
1314 dirlist.remove("__init__.py")
1315 # Add all *.py files and package subdirectories
1316 for filename in dirlist:
1317 path = os.path.join(pathname, filename)
1318 root, ext = os.path.splitext(filename)
1319 if os.path.isdir(path):
1320 if os.path.isfile(os.path.join(path, "__init__.py")):
1321 # This is a package directory, add it
1322 self.writepy(path, basename) # Recursive call
1323 elif ext == ".py":
1324 fname, arcname = self._get_codename(path[0:-3],
1325 basename)
1326 if self.debug:
1327 print "Adding", arcname
1328 self.write(fname, arcname)
1329 else:
1330 # This is NOT a package directory, add its files at top level
1331 if self.debug:
1332 print "Adding files from directory", pathname
1333 for filename in os.listdir(pathname):
1334 path = os.path.join(pathname, filename)
1335 root, ext = os.path.splitext(filename)
1336 if ext == ".py":
1337 fname, arcname = self._get_codename(path[0:-3],
1338 basename)
1339 if self.debug:
1340 print "Adding", arcname
1341 self.write(fname, arcname)
1342 else:
1343 if pathname[-3:] != ".py":
1344 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001345 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001346 fname, arcname = self._get_codename(pathname[0:-3], basename)
1347 if self.debug:
1348 print "Adding file", arcname
1349 self.write(fname, arcname)
1350
1351 def _get_codename(self, pathname, basename):
1352 """Return (filename, archivename) for the path.
1353
Fred Drake484d7352000-10-02 21:14:52 +00001354 Given a module name path, return the correct file path and
1355 archive name, compiling if necessary. For example, given
1356 /python/lib/string, return (/python/lib/string.pyc, string).
1357 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 file_py = pathname + ".py"
1359 file_pyc = pathname + ".pyc"
1360 file_pyo = pathname + ".pyo"
1361 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001362 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001363 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001364 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001365 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001366 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 if self.debug:
1368 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001369 try:
1370 py_compile.compile(file_py, file_pyc, None, True)
1371 except py_compile.PyCompileError,err:
1372 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 fname = file_pyc
1374 else:
1375 fname = file_pyc
1376 archivename = os.path.split(fname)[1]
1377 if basename:
1378 archivename = "%s/%s" % (basename, archivename)
1379 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001380
1381
1382def main(args = None):
1383 import textwrap
1384 USAGE=textwrap.dedent("""\
1385 Usage:
1386 zipfile.py -l zipfile.zip # Show listing of a zipfile
1387 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1388 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1389 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1390 """)
1391 if args is None:
1392 args = sys.argv[1:]
1393
1394 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1395 print USAGE
1396 sys.exit(1)
1397
1398 if args[0] == '-l':
1399 if len(args) != 2:
1400 print USAGE
1401 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001402 with ZipFile(args[1], 'r') as zf:
1403 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001404
1405 elif args[0] == '-t':
1406 if len(args) != 2:
1407 print USAGE
1408 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001409 with ZipFile(args[1], 'r') as zf:
1410 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001411 if badfile:
1412 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001413 print "Done testing"
1414
1415 elif args[0] == '-e':
1416 if len(args) != 3:
1417 print USAGE
1418 sys.exit(1)
1419
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001420 with ZipFile(args[1], 'r') as zf:
1421 out = args[2]
1422 for path in zf.namelist():
1423 if path.startswith('./'):
1424 tgt = os.path.join(out, path[2:])
1425 else:
1426 tgt = os.path.join(out, path)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001427
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001428 tgtdir = os.path.dirname(tgt)
1429 if not os.path.exists(tgtdir):
1430 os.makedirs(tgtdir)
1431 with open(tgt, 'wb') as fp:
1432 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001433
1434 elif args[0] == '-c':
1435 if len(args) < 3:
1436 print USAGE
1437 sys.exit(1)
1438
1439 def addToZip(zf, path, zippath):
1440 if os.path.isfile(path):
1441 zf.write(path, zippath, ZIP_DEFLATED)
1442 elif os.path.isdir(path):
1443 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001444 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001445 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001446 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001447
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001448 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1449 for src in args[2:]:
1450 addToZip(zf, src, os.path.basename(src))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001451
1452if __name__ == "__main__":
1453 main()