blob: d9181f2393fc4a0ee39c75f97f7413cc4730f3dd [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800293
294 if date_time[0] < 1980:
295 raise ValueError('ZIP does not support timestamps before 1980')
296
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.compress_type = ZIP_STORED # Type of compression for the file
299 self.comment = "" # Comment for each file
300 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000301 if sys.platform == 'win32':
302 self.create_system = 0 # System which created ZIP archive
303 else:
304 # Assume everything else is unix-y
305 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.create_version = 20 # Version which created ZIP archive
307 self.extract_version = 20 # Version needed to extract archive
308 self.reserved = 0 # Must be zero
309 self.flag_bits = 0 # ZIP flag bits
310 self.volume = 0 # Volume number of file header
311 self.internal_attr = 0 # Internal attributes
312 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000315 # CRC CRC-32 of the uncompressed file
316 # compress_size Size of the compressed file
317 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
319 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 dt = self.date_time
322 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000323 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000325 # Set these to zero because we write them after the file data
326 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 else:
Tim Peterse1190062001-01-15 03:34:38 +0000328 CRC = self.CRC
329 compress_size = self.compress_size
330 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000331
332 extra = self.extra
333
334 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
335 # File is larger than what fits into a 4 byte integer,
336 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000337 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 extra = extra + struct.pack(fmt,
339 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000340 file_size = 0xffffffff
341 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000342 self.extract_version = max(45, self.extract_version)
343 self.create_version = max(45, self.extract_version)
344
Martin v. Löwis471617d2008-05-05 17:16:58 +0000345 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000346 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000347 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 self.compress_type, dostime, dosdate, CRC,
349 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000350 len(filename), len(extra))
351 return header + filename + extra
352
353 def _encodeFilenameFlags(self):
354 if isinstance(self.filename, unicode):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
359 else:
360 return self.filename, self.flag_bits
361
362 def _decodeFilename(self):
363 if self.flag_bits & 0x800:
364 return self.filename.decode('utf-8')
365 else:
366 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 def _decodeExtra(self):
369 # Try to decode the extra field.
370 extra = self.extra
371 unpack = struct.unpack
372 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000373 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000374 if tp == 1:
375 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000380 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 elif ln == 0:
382 counts = ()
383 else:
384 raise RuntimeError, "Corrupt extra field %s"%(ln,)
385
386 idx = 0
387
388 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000389 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 self.file_size = counts[idx]
391 idx += 1
392
Martin v. Löwis8c436412008-07-03 12:51:14 +0000393 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 self.compress_size = counts[idx]
395 idx += 1
396
Martin v. Löwis8c436412008-07-03 12:51:14 +0000397 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000398 old = self.header_offset
399 self.header_offset = counts[idx]
400 idx+=1
401
402 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000403
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000405class _ZipDecrypter:
406 """Class to handle decryption of files stored within a ZIP archive.
407
408 ZIP supports a password-based form of encryption. Even though known
409 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000410 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000411
412 Usage:
413 zd = _ZipDecrypter(mypwd)
414 plain_char = zd(cypher_char)
415 plain_text = map(zd, cypher_text)
416 """
417
418 def _GenerateCRCTable():
419 """Generate a CRC-32 table.
420
421 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
422 internal keys. We noticed that a direct implementation is faster than
423 relying on binascii.crc32().
424 """
425 poly = 0xedb88320
426 table = [0] * 256
427 for i in range(256):
428 crc = i
429 for j in range(8):
430 if crc & 1:
431 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
432 else:
433 crc = ((crc >> 1) & 0x7FFFFFFF)
434 table[i] = crc
435 return table
436 crctable = _GenerateCRCTable()
437
438 def _crc32(self, ch, crc):
439 """Compute the CRC32 primitive on one byte."""
440 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
441
442 def __init__(self, pwd):
443 self.key0 = 305419896
444 self.key1 = 591751049
445 self.key2 = 878082192
446 for p in pwd:
447 self._UpdateKeys(p)
448
449 def _UpdateKeys(self, c):
450 self.key0 = self._crc32(c, self.key0)
451 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
452 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
453 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
454
455 def __call__(self, c):
456 """Decrypt a single character."""
457 c = ord(c)
458 k = self.key2 | 2
459 c = c ^ (((k * (k^1)) >> 8) & 255)
460 c = chr(c)
461 self._UpdateKeys(c)
462 return c
463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000465 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000466 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000467 """
Tim Petersea5962f2007-03-12 18:07:52 +0000468
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000469 # Max size supported by decompressor.
470 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000472 # Read from compressed files in 4k blocks.
473 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000474
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000475 # Search for universal newlines or line chunks.
476 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
477
478 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
479 self._fileobj = fileobj
480 self._decrypter = decrypter
481
Ezio Melotti4611b052010-01-28 01:41:30 +0000482 self._compress_type = zipinfo.compress_type
483 self._compress_size = zipinfo.compress_size
484 self._compress_left = zipinfo.compress_size
485
486 if self._compress_type == ZIP_DEFLATED:
487 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000488 self._unconsumed = ''
489
490 self._readbuffer = ''
491 self._offset = 0
492
493 self._universal = 'U' in mode
494 self.newlines = None
495
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000496 # Adjust read size for encrypted files since the first 12 bytes
497 # are for the encryption/password information.
498 if self._decrypter is not None:
499 self._compress_left -= 12
500
501 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000502 self.name = zipinfo.filename
503
Antoine Pitroue1436d12010-08-12 15:25:51 +0000504 if hasattr(zipinfo, 'CRC'):
505 self._expected_crc = zipinfo.CRC
506 self._running_crc = crc32(b'') & 0xffffffff
507 else:
508 self._expected_crc = None
509
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000510 def readline(self, limit=-1):
511 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000512
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000513 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000514 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000515
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000516 if not self._universal and limit < 0:
517 # Shortcut common case - newline found in buffer.
518 i = self._readbuffer.find('\n', self._offset) + 1
519 if i > 0:
520 line = self._readbuffer[self._offset: i]
521 self._offset = i
522 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000523
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000524 if not self._universal:
525 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000526
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000527 line = ''
528 while limit < 0 or len(line) < limit:
529 readahead = self.peek(2)
530 if readahead == '':
531 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000532
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000533 #
534 # Search for universal newlines or line chunks.
535 #
536 # The pattern returns either a line chunk or a newline, but not
537 # both. Combined with peek(2), we are assured that the sequence
538 # '\r\n' is always retrieved completely and never split into
539 # separate newlines - '\r', '\n' due to coincidental readaheads.
540 #
541 match = self.PATTERN.search(readahead)
542 newline = match.group('newline')
543 if newline is not None:
544 if self.newlines is None:
545 self.newlines = []
546 if newline not in self.newlines:
547 self.newlines.append(newline)
548 self._offset += len(newline)
549 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000550
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000551 chunk = match.group('chunk')
552 if limit >= 0:
553 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000554
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000555 self._offset += len(chunk)
556 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000557
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000558 return line
559
560 def peek(self, n=1):
561 """Returns buffered bytes without advancing the position."""
562 if n > len(self._readbuffer) - self._offset:
563 chunk = self.read(n)
564 self._offset -= len(chunk)
565
566 # Return up to 512 bytes to reduce allocation overhead for tight loops.
567 return self._readbuffer[self._offset: self._offset + 512]
568
569 def readable(self):
570 return True
571
572 def read(self, n=-1):
573 """Read and return up to n bytes.
574 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000575 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000576 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000577 if n is None:
578 n = -1
579 while True:
580 if n < 0:
581 data = self.read1(n)
582 elif n > len(buf):
583 data = self.read1(n - len(buf))
584 else:
585 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000586 if len(data) == 0:
587 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000589
Antoine Pitroue1436d12010-08-12 15:25:51 +0000590 def _update_crc(self, newdata, eof):
591 # Update the CRC using the given data.
592 if self._expected_crc is None:
593 # No need to compute the CRC if we don't have a reference value
594 return
595 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
596 # Check the CRC if we're at the end of the file
597 if eof and self._running_crc != self._expected_crc:
598 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
599
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000600 def read1(self, n):
601 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000602
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000603 # Simplify algorithm (branching) by transforming negative n to large n.
604 if n < 0 or n is None:
605 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000606
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000607 # Bytes available in read buffer.
608 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000609
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000610 # Read from file.
611 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
612 nbytes = n - len_readbuffer - len(self._unconsumed)
613 nbytes = max(nbytes, self.MIN_READ_SIZE)
614 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000615
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000616 data = self._fileobj.read(nbytes)
617 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000618
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000619 if data and self._decrypter is not None:
620 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000621
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000622 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000623 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000624 self._readbuffer = self._readbuffer[self._offset:] + data
625 self._offset = 0
626 else:
627 # Prepare deflated bytes for decompression.
628 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000629
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000631 if (len(self._unconsumed) > 0 and n > len_readbuffer and
632 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000633 data = self._decompressor.decompress(
634 self._unconsumed,
635 max(n - len_readbuffer, self.MIN_READ_SIZE)
636 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000637
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000638 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000639 eof = len(self._unconsumed) == 0 and self._compress_left == 0
640 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000641 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000642
Antoine Pitroue1436d12010-08-12 15:25:51 +0000643 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000644 self._readbuffer = self._readbuffer[self._offset:] + data
645 self._offset = 0
646
647 # Read from buffer.
648 data = self._readbuffer[self._offset: self._offset + n]
649 self._offset += len(data)
650 return data
651
Tim Petersea5962f2007-03-12 18:07:52 +0000652
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000653
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000654class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000655 """ Class with methods to open, read, write, close, list zip files.
656
Martin v. Löwis8c436412008-07-03 12:51:14 +0000657 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000658
Fred Drake3d9091e2001-03-26 15:49:24 +0000659 file: Either the path to the file, or a file-like object.
660 If it is a path, the file will be opened and closed by ZipFile.
661 mode: The mode can be either read "r", write "w" or append "a".
662 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000663 allowZip64: if True ZipFile will create files with ZIP64 extensions when
664 needed, otherwise it will raise an exception when this would
665 be necessary.
666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 """
Fred Drake484d7352000-10-02 21:14:52 +0000668
Fred Drake90eac282001-02-28 05:29:34 +0000669 fp = None # Set here since __del__ checks it
670
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000671 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000672 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000673 if mode not in ("r", "w", "a"):
674 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
675
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 if compression == ZIP_STORED:
677 pass
678 elif compression == ZIP_DEFLATED:
679 if not zlib:
680 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000681 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000682 else:
683 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000684
685 self._allowZip64 = allowZip64
686 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000687 self.debug = 0 # Level of printing: 0 through 3
688 self.NameToInfo = {} # Find file info given name
689 self.filelist = [] # List of ZipInfo instances for archive
690 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000691 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000692 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000693 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000694
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000696 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000697 self._filePassed = 0
698 self.filename = file
699 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000700 try:
701 self.fp = open(file, modeDict[mode])
702 except IOError:
703 if mode == 'a':
704 mode = key = 'w'
705 self.fp = open(file, modeDict[mode])
706 else:
707 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 else:
709 self._filePassed = 1
710 self.fp = file
711 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000712
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 self._GetContents()
715 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000716 # set the modified flag so central directory gets written
717 # even if no files are added to the archive
718 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 elif key == 'a':
Georg Brandl86e0c892010-11-26 07:22:28 +0000720 try:
721 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000722 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000724 self.fp.seek(self.start_dir, 0)
Georg Brandl86e0c892010-11-26 07:22:28 +0000725 except BadZipfile:
726 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000727 self.fp.seek(0, 2)
Georg Brandl86e0c892010-11-26 07:22:28 +0000728
729 # set the modified flag so central directory gets written
730 # even if no files are added to the archive
731 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000733 if not self._filePassed:
734 self.fp.close()
735 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 raise RuntimeError, 'Mode must be "r", "w" or "a"'
737
Ezio Melotti569e61f2009-12-30 06:14:51 +0000738 def __enter__(self):
739 return self
740
741 def __exit__(self, type, value, traceback):
742 self.close()
743
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000745 """Read the directory, making sure we close the file if the format
746 is bad."""
747 try:
748 self._RealGetContents()
749 except BadZipfile:
750 if not self._filePassed:
751 self.fp.close()
752 self.fp = None
753 raise
754
755 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000756 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000758 try:
759 endrec = _EndRecData(fp)
760 except IOError:
761 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000762 if not endrec:
763 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000764 if self.debug > 1:
765 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000766 size_cd = endrec[_ECD_SIZE] # bytes in central directory
767 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
768 self.comment = endrec[_ECD_COMMENT] # archive comment
769
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000771 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000772 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
773 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000774 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
775
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000777 inferred = concat + offset_cd
778 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000779 # self.start_dir: Position of start of central directory
780 self.start_dir = offset_cd + concat
781 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000782 data = fp.read(size_cd)
783 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000784 total = 0
785 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000786 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000787 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 raise BadZipfile, "Bad magic number for central directory"
789 centdir = struct.unpack(structCentralDir, centdir)
790 if self.debug > 2:
791 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000792 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 # Create ZipInfo instance to store file information
794 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000795 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
796 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000797 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 (x.create_version, x.create_system, x.extract_version, x.reserved,
799 x.flag_bits, x.compress_type, t, d,
800 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
801 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
802 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000803 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000805 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000806
807 x._decodeExtra()
808 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000809 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 self.filelist.append(x)
811 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000812
813 # update total bytes read from central directory
814 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
815 + centdir[_CD_EXTRA_FIELD_LENGTH]
816 + centdir[_CD_COMMENT_LENGTH])
817
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 if self.debug > 2:
819 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000820
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821
822 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000823 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824 l = []
825 for data in self.filelist:
826 l.append(data.filename)
827 return l
828
829 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000830 """Return a list of class ZipInfo instances for files in the
831 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 return self.filelist
833
834 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000835 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
837 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000838 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000839 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
840
841 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000842 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000843 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 for zinfo in self.filelist:
845 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000846 # Read by chunks, to avoid an OverflowError or a
847 # MemoryError with very large embedded files.
848 f = self.open(zinfo.filename, "r")
849 while f.read(chunk_size): # Check CRC-32
850 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000851 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 return zinfo.filename
853
854 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000855 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000856 info = self.NameToInfo.get(name)
857 if info is None:
858 raise KeyError(
859 'There is no item named %r in the archive' % name)
860
861 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000863 def setpassword(self, pwd):
864 """Set default password for encrypted files."""
865 self.pwd = pwd
866
867 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000868 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000869 return self.open(name, "r", pwd).read()
870
871 def open(self, name, mode="r", pwd=None):
872 """Return file-like object for 'name'."""
873 if mode not in ("r", "U", "rU"):
874 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000875 if not self.fp:
876 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000877 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000878
Tim Petersea5962f2007-03-12 18:07:52 +0000879 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000880 # given a file object in the constructor
881 if self._filePassed:
882 zef_file = self.fp
883 else:
884 zef_file = open(self.filename, 'rb')
885
Georg Brandl112aa502008-05-20 08:25:48 +0000886 # Make sure we have an info object
887 if isinstance(name, ZipInfo):
888 # 'name' is already an info object
889 zinfo = name
890 else:
891 # Get info object for name
892 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000893
894 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000895
896 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000897 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000898 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000899 raise BadZipfile, "Bad magic number for file header"
900
901 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000902 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000903 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000905
906 if fname != zinfo.orig_filename:
907 raise BadZipfile, \
908 'File name in directory "%s" and header "%s" differ.' % (
909 zinfo.orig_filename, fname)
910
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000911 # check for encrypted flag & handle password
912 is_encrypted = zinfo.flag_bits & 0x1
913 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000914 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000915 if not pwd:
916 pwd = self.pwd
917 if not pwd:
918 raise RuntimeError, "File %s is encrypted, " \
919 "password required for extraction" % name
920
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000921 zd = _ZipDecrypter(pwd)
922 # The first 12 bytes in the cypher stream is an encryption header
923 # used to strengthen the algorithm. The first 11 bytes are
924 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000925 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000926 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000927 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000928 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000929 if zinfo.flag_bits & 0x8:
930 # compare against the file type from extended local headers
931 check_byte = (zinfo._raw_time >> 8) & 0xff
932 else:
933 # compare against the CRC otherwise
934 check_byte = (zinfo.CRC >> 24) & 0xff
935 if ord(h[11]) != check_byte:
936 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000937
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000938 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000939
Georg Brandl62416bc2008-01-07 18:47:44 +0000940 def extract(self, member, path=None, pwd=None):
941 """Extract a member from the archive to the current working directory,
942 using its full name. Its file information is extracted as accurately
943 as possible. `member' may be a filename or a ZipInfo object. You can
944 specify a different directory using `path'.
945 """
946 if not isinstance(member, ZipInfo):
947 member = self.getinfo(member)
948
949 if path is None:
950 path = os.getcwd()
951
952 return self._extract_member(member, path, pwd)
953
954 def extractall(self, path=None, members=None, pwd=None):
955 """Extract all members from the archive to the current working
956 directory. `path' specifies a different directory to extract to.
957 `members' is optional and must be a subset of the list returned
958 by namelist().
959 """
960 if members is None:
961 members = self.namelist()
962
963 for zipinfo in members:
964 self.extract(zipinfo, path, pwd)
965
966 def _extract_member(self, member, targetpath, pwd):
967 """Extract the ZipInfo object 'member' to a physical
968 file on the path targetpath.
969 """
970 # build the destination pathname, replacing
971 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000972 # Strip trailing path separator, unless it represents the root.
973 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
974 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000975 targetpath = targetpath[:-1]
976
977 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000978 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000979 targetpath = os.path.join(targetpath, member.filename[1:])
980 else:
981 targetpath = os.path.join(targetpath, member.filename)
982
983 targetpath = os.path.normpath(targetpath)
984
985 # Create all upper directories if necessary.
986 upperdirs = os.path.dirname(targetpath)
987 if upperdirs and not os.path.exists(upperdirs):
988 os.makedirs(upperdirs)
989
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000990 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000991 if not os.path.isdir(targetpath):
992 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000993 return targetpath
994
Georg Brandl112aa502008-05-20 08:25:48 +0000995 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000996 target = file(targetpath, "wb")
997 shutil.copyfileobj(source, target)
998 source.close()
999 target.close()
1000
1001 return targetpath
1002
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001004 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001005 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001006 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 print "Duplicate name:", zinfo.filename
1008 if self.mode not in ("w", "a"):
1009 raise RuntimeError, 'write() requires mode "w" or "a"'
1010 if not self.fp:
1011 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001012 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1014 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001015 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1017 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001018 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001019 if zinfo.file_size > ZIP64_LIMIT:
1020 if not self._allowZip64:
1021 raise LargeZipFile("Filesize would require ZIP64 extensions")
1022 if zinfo.header_offset > ZIP64_LIMIT:
1023 if not self._allowZip64:
1024 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025
1026 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001027 """Put the bytes from filename into the archive under the name
1028 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001029 if not self.fp:
1030 raise RuntimeError(
1031 "Attempt to write to ZIP archive that was already closed")
1032
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001033 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001034 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001035 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 date_time = mtime[0:6]
1037 # Create ZipInfo instance to store file information
1038 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001039 arcname = filename
1040 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1041 while arcname[0] in (os.sep, os.altsep):
1042 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001043 if isdir:
1044 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001045 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001046 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001048 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001049 else:
Tim Peterse1190062001-01-15 03:34:38 +00001050 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001051
1052 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001053 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001054 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001055
1056 self._writecheck(zinfo)
1057 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001058
1059 if isdir:
1060 zinfo.file_size = 0
1061 zinfo.compress_size = 0
1062 zinfo.CRC = 0
1063 self.filelist.append(zinfo)
1064 self.NameToInfo[zinfo.filename] = zinfo
1065 self.fp.write(zinfo.FileHeader())
1066 return
1067
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001068 with open(filename, "rb") as fp:
1069 # Must overwrite CRC and sizes with correct data later
1070 zinfo.CRC = CRC = 0
1071 zinfo.compress_size = compress_size = 0
1072 zinfo.file_size = file_size = 0
1073 self.fp.write(zinfo.FileHeader())
1074 if zinfo.compress_type == ZIP_DEFLATED:
1075 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1076 zlib.DEFLATED, -15)
1077 else:
1078 cmpr = None
1079 while 1:
1080 buf = fp.read(1024 * 8)
1081 if not buf:
1082 break
1083 file_size = file_size + len(buf)
1084 CRC = crc32(buf, CRC) & 0xffffffff
1085 if cmpr:
1086 buf = cmpr.compress(buf)
1087 compress_size = compress_size + len(buf)
1088 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 if cmpr:
1090 buf = cmpr.flush()
1091 compress_size = compress_size + len(buf)
1092 self.fp.write(buf)
1093 zinfo.compress_size = compress_size
1094 else:
1095 zinfo.compress_size = file_size
1096 zinfo.CRC = CRC
1097 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001098 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001099 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001100 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001101 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001103 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 self.filelist.append(zinfo)
1105 self.NameToInfo[zinfo.filename] = zinfo
1106
Ronald Oussorendd25e862010-02-07 20:18:02 +00001107 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001108 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001109 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1110 the name of the file in the archive."""
1111 if not isinstance(zinfo_or_arcname, ZipInfo):
1112 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001113 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001114
Just van Rossumb083cb32002-12-12 12:23:32 +00001115 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001116 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001117 else:
1118 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001119
1120 if not self.fp:
1121 raise RuntimeError(
1122 "Attempt to write to ZIP archive that was already closed")
1123
Ronald Oussorendd25e862010-02-07 20:18:02 +00001124 if compress_type is not None:
1125 zinfo.compress_type = compress_type
1126
Tim Peterse1190062001-01-15 03:34:38 +00001127 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001128 zinfo.header_offset = self.fp.tell() # Start of header bytes
1129 self._writecheck(zinfo)
1130 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001131 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 if zinfo.compress_type == ZIP_DEFLATED:
1133 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1134 zlib.DEFLATED, -15)
1135 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001136 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001137 else:
1138 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001139 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001142 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001144 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001145 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001146 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001147 self.filelist.append(zinfo)
1148 self.NameToInfo[zinfo.filename] = zinfo
1149
1150 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001151 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001152 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001153
1154 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001155 """Close the file, and for mode "w" and "a" write the ending
1156 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001157 if self.fp is None:
1158 return
Tim Petersa608bb22006-06-15 18:06:29 +00001159
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001160 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161 count = 0
1162 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001163 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164 count = count + 1
1165 dt = zinfo.date_time
1166 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001167 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001168 extra = []
1169 if zinfo.file_size > ZIP64_LIMIT \
1170 or zinfo.compress_size > ZIP64_LIMIT:
1171 extra.append(zinfo.file_size)
1172 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001173 file_size = 0xffffffff
1174 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001175 else:
1176 file_size = zinfo.file_size
1177 compress_size = zinfo.compress_size
1178
1179 if zinfo.header_offset > ZIP64_LIMIT:
1180 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001181 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001182 else:
1183 header_offset = zinfo.header_offset
1184
1185 extra_data = zinfo.extra
1186 if extra:
1187 # Append a ZIP64 field to the extra's
1188 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001189 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001190 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001191
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001192 extract_version = max(45, zinfo.extract_version)
1193 create_version = max(45, zinfo.create_version)
1194 else:
1195 extract_version = zinfo.extract_version
1196 create_version = zinfo.create_version
1197
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001198 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001199 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001200 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001201 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001202 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001203 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001204 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001205 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001206 0, zinfo.internal_attr, zinfo.external_attr,
1207 header_offset)
1208 except DeprecationWarning:
1209 print >>sys.stderr, (structCentralDir,
1210 stringCentralDir, create_version,
1211 zinfo.create_system, extract_version, zinfo.reserved,
1212 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1213 zinfo.CRC, compress_size, file_size,
1214 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1215 0, zinfo.internal_attr, zinfo.external_attr,
1216 header_offset)
1217 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001218 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001219 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001220 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001222
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001223 pos2 = self.fp.tell()
1224 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001225 centDirCount = count
1226 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001227 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001228 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1229 centDirOffset > ZIP64_LIMIT or
1230 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001231 # Need to write the ZIP64 end-of-archive records
1232 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001233 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001234 44, 45, 45, 0, 0, centDirCount, centDirCount,
1235 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001236 self.fp.write(zip64endrec)
1237
1238 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001239 structEndArchive64Locator,
1240 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001241 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001242 centDirCount = min(centDirCount, 0xFFFF)
1243 centDirSize = min(centDirSize, 0xFFFFFFFF)
1244 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001245
Martin v. Löwis8c436412008-07-03 12:51:14 +00001246 # check for valid comment length
1247 if len(self.comment) >= ZIP_MAX_COMMENT:
1248 if self.debug > 0:
1249 msg = 'Archive comment is too long; truncating to %d bytes' \
1250 % ZIP_MAX_COMMENT
1251 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001252
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001253 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001254 0, 0, centDirCount, centDirCount,
1255 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001256 self.fp.write(endrec)
1257 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001258 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001259
Fred Drake3d9091e2001-03-26 15:49:24 +00001260 if not self._filePassed:
1261 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001262 self.fp = None
1263
1264
1265class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001266 """Class to create ZIP archives with Python library files and packages."""
1267
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 def writepy(self, pathname, basename = ""):
1269 """Add all files from "pathname" to the ZIP archive.
1270
Fred Drake484d7352000-10-02 21:14:52 +00001271 If pathname is a package directory, search the directory and
1272 all package subdirectories recursively for all *.py and enter
1273 the modules into the archive. If pathname is a plain
1274 directory, listdir *.py and enter all modules. Else, pathname
1275 must be a Python *.py file and the module will be put into the
1276 archive. Added modules are always module.pyo or module.pyc.
1277 This method will compile the module.py into module.pyc if
1278 necessary.
1279 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 dir, name = os.path.split(pathname)
1281 if os.path.isdir(pathname):
1282 initname = os.path.join(pathname, "__init__.py")
1283 if os.path.isfile(initname):
1284 # This is a package directory, add it
1285 if basename:
1286 basename = "%s/%s" % (basename, name)
1287 else:
1288 basename = name
1289 if self.debug:
1290 print "Adding package in", pathname, "as", basename
1291 fname, arcname = self._get_codename(initname[0:-3], basename)
1292 if self.debug:
1293 print "Adding", arcname
1294 self.write(fname, arcname)
1295 dirlist = os.listdir(pathname)
1296 dirlist.remove("__init__.py")
1297 # Add all *.py files and package subdirectories
1298 for filename in dirlist:
1299 path = os.path.join(pathname, filename)
1300 root, ext = os.path.splitext(filename)
1301 if os.path.isdir(path):
1302 if os.path.isfile(os.path.join(path, "__init__.py")):
1303 # This is a package directory, add it
1304 self.writepy(path, basename) # Recursive call
1305 elif ext == ".py":
1306 fname, arcname = self._get_codename(path[0:-3],
1307 basename)
1308 if self.debug:
1309 print "Adding", arcname
1310 self.write(fname, arcname)
1311 else:
1312 # This is NOT a package directory, add its files at top level
1313 if self.debug:
1314 print "Adding files from directory", pathname
1315 for filename in os.listdir(pathname):
1316 path = os.path.join(pathname, filename)
1317 root, ext = os.path.splitext(filename)
1318 if ext == ".py":
1319 fname, arcname = self._get_codename(path[0:-3],
1320 basename)
1321 if self.debug:
1322 print "Adding", arcname
1323 self.write(fname, arcname)
1324 else:
1325 if pathname[-3:] != ".py":
1326 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001327 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 fname, arcname = self._get_codename(pathname[0:-3], basename)
1329 if self.debug:
1330 print "Adding file", arcname
1331 self.write(fname, arcname)
1332
1333 def _get_codename(self, pathname, basename):
1334 """Return (filename, archivename) for the path.
1335
Fred Drake484d7352000-10-02 21:14:52 +00001336 Given a module name path, return the correct file path and
1337 archive name, compiling if necessary. For example, given
1338 /python/lib/string, return (/python/lib/string.pyc, string).
1339 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 file_py = pathname + ".py"
1341 file_pyc = pathname + ".pyc"
1342 file_pyo = pathname + ".pyo"
1343 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001344 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001345 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001346 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001347 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001348 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 if self.debug:
1350 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001351 try:
1352 py_compile.compile(file_py, file_pyc, None, True)
1353 except py_compile.PyCompileError,err:
1354 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001355 fname = file_pyc
1356 else:
1357 fname = file_pyc
1358 archivename = os.path.split(fname)[1]
1359 if basename:
1360 archivename = "%s/%s" % (basename, archivename)
1361 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001362
1363
1364def main(args = None):
1365 import textwrap
1366 USAGE=textwrap.dedent("""\
1367 Usage:
1368 zipfile.py -l zipfile.zip # Show listing of a zipfile
1369 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1370 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1371 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1372 """)
1373 if args is None:
1374 args = sys.argv[1:]
1375
1376 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1377 print USAGE
1378 sys.exit(1)
1379
1380 if args[0] == '-l':
1381 if len(args) != 2:
1382 print USAGE
1383 sys.exit(1)
1384 zf = ZipFile(args[1], 'r')
1385 zf.printdir()
1386 zf.close()
1387
1388 elif args[0] == '-t':
1389 if len(args) != 2:
1390 print USAGE
1391 sys.exit(1)
1392 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001393 badfile = zf.testzip()
1394 if badfile:
1395 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001396 print "Done testing"
1397
1398 elif args[0] == '-e':
1399 if len(args) != 3:
1400 print USAGE
1401 sys.exit(1)
1402
1403 zf = ZipFile(args[1], 'r')
1404 out = args[2]
1405 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001406 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001407 tgt = os.path.join(out, path[2:])
1408 else:
1409 tgt = os.path.join(out, path)
1410
1411 tgtdir = os.path.dirname(tgt)
1412 if not os.path.exists(tgtdir):
1413 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001414 with open(tgt, 'wb') as fp:
1415 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001416 zf.close()
1417
1418 elif args[0] == '-c':
1419 if len(args) < 3:
1420 print USAGE
1421 sys.exit(1)
1422
1423 def addToZip(zf, path, zippath):
1424 if os.path.isfile(path):
1425 zf.write(path, zippath, ZIP_DEFLATED)
1426 elif os.path.isdir(path):
1427 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001428 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001429 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001430 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001431
1432 zf = ZipFile(args[1], 'w', allowZip64=True)
1433 for src in args[2:]:
1434 addToZip(zf, src, os.path.basename(src))
1435
1436 zf.close()
1437
1438if __name__ == "__main__":
1439 main()