blob: f876f428ae49252f33336136bd0eb7b79f62ccdb [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000293 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000294 self.compress_type = ZIP_STORED # Type of compression for the file
295 self.comment = "" # Comment for each file
296 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000297 if sys.platform == 'win32':
298 self.create_system = 0 # System which created ZIP archive
299 else:
300 # Assume everything else is unix-y
301 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.create_version = 20 # Version which created ZIP archive
303 self.extract_version = 20 # Version needed to extract archive
304 self.reserved = 0 # Must be zero
305 self.flag_bits = 0 # ZIP flag bits
306 self.volume = 0 # Volume number of file header
307 self.internal_attr = 0 # Internal attributes
308 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000309 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000310 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000311 # CRC CRC-32 of the uncompressed file
312 # compress_size Size of the compressed file
313 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314
315 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000316 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 dt = self.date_time
318 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000319 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000321 # Set these to zero because we write them after the file data
322 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 else:
Tim Peterse1190062001-01-15 03:34:38 +0000324 CRC = self.CRC
325 compress_size = self.compress_size
326 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000327
328 extra = self.extra
329
330 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
331 # File is larger than what fits into a 4 byte integer,
332 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000333 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000334 extra = extra + struct.pack(fmt,
335 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000336 file_size = 0xffffffff
337 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 self.extract_version = max(45, self.extract_version)
339 self.create_version = max(45, self.extract_version)
340
Martin v. Löwis471617d2008-05-05 17:16:58 +0000341 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000342 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000343 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000344 self.compress_type, dostime, dosdate, CRC,
345 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000346 len(filename), len(extra))
347 return header + filename + extra
348
349 def _encodeFilenameFlags(self):
350 if isinstance(self.filename, unicode):
351 try:
352 return self.filename.encode('ascii'), self.flag_bits
353 except UnicodeEncodeError:
354 return self.filename.encode('utf-8'), self.flag_bits | 0x800
355 else:
356 return self.filename, self.flag_bits
357
358 def _decodeFilename(self):
359 if self.flag_bits & 0x800:
360 return self.filename.decode('utf-8')
361 else:
362 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000364 def _decodeExtra(self):
365 # Try to decode the extra field.
366 extra = self.extra
367 unpack = struct.unpack
368 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000369 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000370 if tp == 1:
371 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000372 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000374 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000375 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 0:
378 counts = ()
379 else:
380 raise RuntimeError, "Corrupt extra field %s"%(ln,)
381
382 idx = 0
383
384 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000385 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 self.file_size = counts[idx]
387 idx += 1
388
Martin v. Löwis8c436412008-07-03 12:51:14 +0000389 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 self.compress_size = counts[idx]
391 idx += 1
392
Martin v. Löwis8c436412008-07-03 12:51:14 +0000393 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 old = self.header_offset
395 self.header_offset = counts[idx]
396 idx+=1
397
398 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000399
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000400
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000401class _ZipDecrypter:
402 """Class to handle decryption of files stored within a ZIP archive.
403
404 ZIP supports a password-based form of encryption. Even though known
405 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000406 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000407
408 Usage:
409 zd = _ZipDecrypter(mypwd)
410 plain_char = zd(cypher_char)
411 plain_text = map(zd, cypher_text)
412 """
413
414 def _GenerateCRCTable():
415 """Generate a CRC-32 table.
416
417 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
418 internal keys. We noticed that a direct implementation is faster than
419 relying on binascii.crc32().
420 """
421 poly = 0xedb88320
422 table = [0] * 256
423 for i in range(256):
424 crc = i
425 for j in range(8):
426 if crc & 1:
427 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
428 else:
429 crc = ((crc >> 1) & 0x7FFFFFFF)
430 table[i] = crc
431 return table
432 crctable = _GenerateCRCTable()
433
434 def _crc32(self, ch, crc):
435 """Compute the CRC32 primitive on one byte."""
436 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
437
438 def __init__(self, pwd):
439 self.key0 = 305419896
440 self.key1 = 591751049
441 self.key2 = 878082192
442 for p in pwd:
443 self._UpdateKeys(p)
444
445 def _UpdateKeys(self, c):
446 self.key0 = self._crc32(c, self.key0)
447 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
448 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
449 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
450
451 def __call__(self, c):
452 """Decrypt a single character."""
453 c = ord(c)
454 k = self.key2 | 2
455 c = c ^ (((k * (k^1)) >> 8) & 255)
456 c = chr(c)
457 self._UpdateKeys(c)
458 return c
459
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000460class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000461 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000462 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463 """
Tim Petersea5962f2007-03-12 18:07:52 +0000464
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000465 # Max size supported by decompressor.
466 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000467
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000468 # Read from compressed files in 4k blocks.
469 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000470
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000471 # Search for universal newlines or line chunks.
472 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
473
474 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
475 self._fileobj = fileobj
476 self._decrypter = decrypter
477
Ezio Melotti4611b052010-01-28 01:41:30 +0000478 self._compress_type = zipinfo.compress_type
479 self._compress_size = zipinfo.compress_size
480 self._compress_left = zipinfo.compress_size
481
482 if self._compress_type == ZIP_DEFLATED:
483 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000484 self._unconsumed = ''
485
486 self._readbuffer = ''
487 self._offset = 0
488
489 self._universal = 'U' in mode
490 self.newlines = None
491
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000492 # Adjust read size for encrypted files since the first 12 bytes
493 # are for the encryption/password information.
494 if self._decrypter is not None:
495 self._compress_left -= 12
496
497 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000498 self.name = zipinfo.filename
499
Antoine Pitroue1436d12010-08-12 15:25:51 +0000500 if hasattr(zipinfo, 'CRC'):
501 self._expected_crc = zipinfo.CRC
502 self._running_crc = crc32(b'') & 0xffffffff
503 else:
504 self._expected_crc = None
505
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000506 def readline(self, limit=-1):
507 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000508
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000509 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000510 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000511
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000512 if not self._universal and limit < 0:
513 # Shortcut common case - newline found in buffer.
514 i = self._readbuffer.find('\n', self._offset) + 1
515 if i > 0:
516 line = self._readbuffer[self._offset: i]
517 self._offset = i
518 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000519
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000520 if not self._universal:
521 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000522
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000523 line = ''
524 while limit < 0 or len(line) < limit:
525 readahead = self.peek(2)
526 if readahead == '':
527 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000528
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000529 #
530 # Search for universal newlines or line chunks.
531 #
532 # The pattern returns either a line chunk or a newline, but not
533 # both. Combined with peek(2), we are assured that the sequence
534 # '\r\n' is always retrieved completely and never split into
535 # separate newlines - '\r', '\n' due to coincidental readaheads.
536 #
537 match = self.PATTERN.search(readahead)
538 newline = match.group('newline')
539 if newline is not None:
540 if self.newlines is None:
541 self.newlines = []
542 if newline not in self.newlines:
543 self.newlines.append(newline)
544 self._offset += len(newline)
545 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000546
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000547 chunk = match.group('chunk')
548 if limit >= 0:
549 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000550
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000551 self._offset += len(chunk)
552 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000553
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000554 return line
555
556 def peek(self, n=1):
557 """Returns buffered bytes without advancing the position."""
558 if n > len(self._readbuffer) - self._offset:
559 chunk = self.read(n)
560 self._offset -= len(chunk)
561
562 # Return up to 512 bytes to reduce allocation overhead for tight loops.
563 return self._readbuffer[self._offset: self._offset + 512]
564
565 def readable(self):
566 return True
567
568 def read(self, n=-1):
569 """Read and return up to n bytes.
570 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000571 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000572 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000573 if n is None:
574 n = -1
575 while True:
576 if n < 0:
577 data = self.read1(n)
578 elif n > len(buf):
579 data = self.read1(n - len(buf))
580 else:
581 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000582 if len(data) == 0:
583 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000584 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000585
Antoine Pitroue1436d12010-08-12 15:25:51 +0000586 def _update_crc(self, newdata, eof):
587 # Update the CRC using the given data.
588 if self._expected_crc is None:
589 # No need to compute the CRC if we don't have a reference value
590 return
591 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
592 # Check the CRC if we're at the end of the file
593 if eof and self._running_crc != self._expected_crc:
594 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
595
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000596 def read1(self, n):
597 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000598
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000599 # Simplify algorithm (branching) by transforming negative n to large n.
600 if n < 0 or n is None:
601 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000602
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000603 # Bytes available in read buffer.
604 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000605
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000606 # Read from file.
607 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
608 nbytes = n - len_readbuffer - len(self._unconsumed)
609 nbytes = max(nbytes, self.MIN_READ_SIZE)
610 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000611
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000612 data = self._fileobj.read(nbytes)
613 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000614
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000615 if data and self._decrypter is not None:
616 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000617
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000618 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000619 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000620 self._readbuffer = self._readbuffer[self._offset:] + data
621 self._offset = 0
622 else:
623 # Prepare deflated bytes for decompression.
624 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000625
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000626 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000627 if (len(self._unconsumed) > 0 and n > len_readbuffer and
628 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000629 data = self._decompressor.decompress(
630 self._unconsumed,
631 max(n - len_readbuffer, self.MIN_READ_SIZE)
632 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000633
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000634 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000635 eof = len(self._unconsumed) == 0 and self._compress_left == 0
636 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000637 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000638
Antoine Pitroue1436d12010-08-12 15:25:51 +0000639 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000640 self._readbuffer = self._readbuffer[self._offset:] + data
641 self._offset = 0
642
643 # Read from buffer.
644 data = self._readbuffer[self._offset: self._offset + n]
645 self._offset += len(data)
646 return data
647
Tim Petersea5962f2007-03-12 18:07:52 +0000648
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000649
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000650class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000651 """ Class with methods to open, read, write, close, list zip files.
652
Martin v. Löwis8c436412008-07-03 12:51:14 +0000653 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000654
Fred Drake3d9091e2001-03-26 15:49:24 +0000655 file: Either the path to the file, or a file-like object.
656 If it is a path, the file will be opened and closed by ZipFile.
657 mode: The mode can be either read "r", write "w" or append "a".
658 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000659 allowZip64: if True ZipFile will create files with ZIP64 extensions when
660 needed, otherwise it will raise an exception when this would
661 be necessary.
662
Fred Drake3d9091e2001-03-26 15:49:24 +0000663 """
Fred Drake484d7352000-10-02 21:14:52 +0000664
Fred Drake90eac282001-02-28 05:29:34 +0000665 fp = None # Set here since __del__ checks it
666
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000667 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000668 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000669 if mode not in ("r", "w", "a"):
670 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
671
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000672 if compression == ZIP_STORED:
673 pass
674 elif compression == ZIP_DEFLATED:
675 if not zlib:
676 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000677 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000678 else:
679 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000680
681 self._allowZip64 = allowZip64
682 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000683 self.debug = 0 # Level of printing: 0 through 3
684 self.NameToInfo = {} # Find file info given name
685 self.filelist = [] # List of ZipInfo instances for archive
686 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000687 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000688 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000689 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000690
Fred Drake3d9091e2001-03-26 15:49:24 +0000691 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000692 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000693 self._filePassed = 0
694 self.filename = file
695 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000696 try:
697 self.fp = open(file, modeDict[mode])
698 except IOError:
699 if mode == 'a':
700 mode = key = 'w'
701 self.fp = open(file, modeDict[mode])
702 else:
703 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000704 else:
705 self._filePassed = 1
706 self.fp = file
707 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000708
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 self._GetContents()
711 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000712 # set the modified flag so central directory gets written
713 # even if no files are added to the archive
714 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000715 elif key == 'a':
Georg Brandl86e0c892010-11-26 07:22:28 +0000716 try:
717 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000718 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000720 self.fp.seek(self.start_dir, 0)
Georg Brandl86e0c892010-11-26 07:22:28 +0000721 except BadZipfile:
722 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000723 self.fp.seek(0, 2)
Georg Brandl86e0c892010-11-26 07:22:28 +0000724
725 # set the modified flag so central directory gets written
726 # even if no files are added to the archive
727 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000729 if not self._filePassed:
730 self.fp.close()
731 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 raise RuntimeError, 'Mode must be "r", "w" or "a"'
733
Ezio Melotti569e61f2009-12-30 06:14:51 +0000734 def __enter__(self):
735 return self
736
737 def __exit__(self, type, value, traceback):
738 self.close()
739
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000740 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000741 """Read the directory, making sure we close the file if the format
742 is bad."""
743 try:
744 self._RealGetContents()
745 except BadZipfile:
746 if not self._filePassed:
747 self.fp.close()
748 self.fp = None
749 raise
750
751 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000752 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000754 try:
755 endrec = _EndRecData(fp)
756 except IOError:
757 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000758 if not endrec:
759 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 if self.debug > 1:
761 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000762 size_cd = endrec[_ECD_SIZE] # bytes in central directory
763 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
764 self.comment = endrec[_ECD_COMMENT] # archive comment
765
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000767 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000768 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
769 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000770 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
771
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000773 inferred = concat + offset_cd
774 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 # self.start_dir: Position of start of central directory
776 self.start_dir = offset_cd + concat
777 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000778 data = fp.read(size_cd)
779 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 total = 0
781 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000782 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000783 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000784 raise BadZipfile, "Bad magic number for central directory"
785 centdir = struct.unpack(structCentralDir, centdir)
786 if self.debug > 2:
787 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000788 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 # Create ZipInfo instance to store file information
790 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000791 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
792 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000793 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 (x.create_version, x.create_system, x.extract_version, x.reserved,
795 x.flag_bits, x.compress_type, t, d,
796 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
797 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
798 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000799 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000801 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000802
803 x._decodeExtra()
804 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000805 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 self.filelist.append(x)
807 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000808
809 # update total bytes read from central directory
810 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
811 + centdir[_CD_EXTRA_FIELD_LENGTH]
812 + centdir[_CD_COMMENT_LENGTH])
813
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 if self.debug > 2:
815 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000816
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817
818 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000819 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 l = []
821 for data in self.filelist:
822 l.append(data.filename)
823 return l
824
825 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000826 """Return a list of class ZipInfo instances for files in the
827 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000828 return self.filelist
829
830 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
833 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000834 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
836
837 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000838 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000839 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 for zinfo in self.filelist:
841 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000842 # Read by chunks, to avoid an OverflowError or a
843 # MemoryError with very large embedded files.
844 f = self.open(zinfo.filename, "r")
845 while f.read(chunk_size): # Check CRC-32
846 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000847 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848 return zinfo.filename
849
850 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000851 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000852 info = self.NameToInfo.get(name)
853 if info is None:
854 raise KeyError(
855 'There is no item named %r in the archive' % name)
856
857 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000859 def setpassword(self, pwd):
860 """Set default password for encrypted files."""
861 self.pwd = pwd
862
863 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000864 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000865 return self.open(name, "r", pwd).read()
866
867 def open(self, name, mode="r", pwd=None):
868 """Return file-like object for 'name'."""
869 if mode not in ("r", "U", "rU"):
870 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000871 if not self.fp:
872 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000873 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000874
Tim Petersea5962f2007-03-12 18:07:52 +0000875 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000876 # given a file object in the constructor
877 if self._filePassed:
878 zef_file = self.fp
879 else:
880 zef_file = open(self.filename, 'rb')
881
Georg Brandl112aa502008-05-20 08:25:48 +0000882 # Make sure we have an info object
883 if isinstance(name, ZipInfo):
884 # 'name' is already an info object
885 zinfo = name
886 else:
887 # Get info object for name
888 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000889
890 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000891
892 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000893 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000894 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000895 raise BadZipfile, "Bad magic number for file header"
896
897 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000898 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000899 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000900 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000901
902 if fname != zinfo.orig_filename:
903 raise BadZipfile, \
904 'File name in directory "%s" and header "%s" differ.' % (
905 zinfo.orig_filename, fname)
906
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000907 # check for encrypted flag & handle password
908 is_encrypted = zinfo.flag_bits & 0x1
909 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000910 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000911 if not pwd:
912 pwd = self.pwd
913 if not pwd:
914 raise RuntimeError, "File %s is encrypted, " \
915 "password required for extraction" % name
916
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000917 zd = _ZipDecrypter(pwd)
918 # The first 12 bytes in the cypher stream is an encryption header
919 # used to strengthen the algorithm. The first 11 bytes are
920 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000921 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000922 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000923 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000924 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000925 if zinfo.flag_bits & 0x8:
926 # compare against the file type from extended local headers
927 check_byte = (zinfo._raw_time >> 8) & 0xff
928 else:
929 # compare against the CRC otherwise
930 check_byte = (zinfo.CRC >> 24) & 0xff
931 if ord(h[11]) != check_byte:
932 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000933
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000934 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000935
Georg Brandl62416bc2008-01-07 18:47:44 +0000936 def extract(self, member, path=None, pwd=None):
937 """Extract a member from the archive to the current working directory,
938 using its full name. Its file information is extracted as accurately
939 as possible. `member' may be a filename or a ZipInfo object. You can
940 specify a different directory using `path'.
941 """
942 if not isinstance(member, ZipInfo):
943 member = self.getinfo(member)
944
945 if path is None:
946 path = os.getcwd()
947
948 return self._extract_member(member, path, pwd)
949
950 def extractall(self, path=None, members=None, pwd=None):
951 """Extract all members from the archive to the current working
952 directory. `path' specifies a different directory to extract to.
953 `members' is optional and must be a subset of the list returned
954 by namelist().
955 """
956 if members is None:
957 members = self.namelist()
958
959 for zipinfo in members:
960 self.extract(zipinfo, path, pwd)
961
962 def _extract_member(self, member, targetpath, pwd):
963 """Extract the ZipInfo object 'member' to a physical
964 file on the path targetpath.
965 """
966 # build the destination pathname, replacing
967 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000968 # Strip trailing path separator, unless it represents the root.
969 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
970 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000971 targetpath = targetpath[:-1]
972
973 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000974 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000975 targetpath = os.path.join(targetpath, member.filename[1:])
976 else:
977 targetpath = os.path.join(targetpath, member.filename)
978
979 targetpath = os.path.normpath(targetpath)
980
981 # Create all upper directories if necessary.
982 upperdirs = os.path.dirname(targetpath)
983 if upperdirs and not os.path.exists(upperdirs):
984 os.makedirs(upperdirs)
985
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000986 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000987 if not os.path.isdir(targetpath):
988 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000989 return targetpath
990
Georg Brandl112aa502008-05-20 08:25:48 +0000991 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000992 target = file(targetpath, "wb")
993 shutil.copyfileobj(source, target)
994 source.close()
995 target.close()
996
997 return targetpath
998
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001000 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001001 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001002 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 print "Duplicate name:", zinfo.filename
1004 if self.mode not in ("w", "a"):
1005 raise RuntimeError, 'write() requires mode "w" or "a"'
1006 if not self.fp:
1007 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001008 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1010 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001011 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001012 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1013 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001014 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001015 if zinfo.file_size > ZIP64_LIMIT:
1016 if not self._allowZip64:
1017 raise LargeZipFile("Filesize would require ZIP64 extensions")
1018 if zinfo.header_offset > ZIP64_LIMIT:
1019 if not self._allowZip64:
1020 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021
1022 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001023 """Put the bytes from filename into the archive under the name
1024 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001025 if not self.fp:
1026 raise RuntimeError(
1027 "Attempt to write to ZIP archive that was already closed")
1028
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001030 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001031 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 date_time = mtime[0:6]
1033 # Create ZipInfo instance to store file information
1034 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001035 arcname = filename
1036 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1037 while arcname[0] in (os.sep, os.altsep):
1038 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001039 if isdir:
1040 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001041 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001042 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001044 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001045 else:
Tim Peterse1190062001-01-15 03:34:38 +00001046 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001047
1048 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001049 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001050 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001051
1052 self._writecheck(zinfo)
1053 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001054
1055 if isdir:
1056 zinfo.file_size = 0
1057 zinfo.compress_size = 0
1058 zinfo.CRC = 0
1059 self.filelist.append(zinfo)
1060 self.NameToInfo[zinfo.filename] = zinfo
1061 self.fp.write(zinfo.FileHeader())
1062 return
1063
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001064 with open(filename, "rb") as fp:
1065 # Must overwrite CRC and sizes with correct data later
1066 zinfo.CRC = CRC = 0
1067 zinfo.compress_size = compress_size = 0
1068 zinfo.file_size = file_size = 0
1069 self.fp.write(zinfo.FileHeader())
1070 if zinfo.compress_type == ZIP_DEFLATED:
1071 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1072 zlib.DEFLATED, -15)
1073 else:
1074 cmpr = None
1075 while 1:
1076 buf = fp.read(1024 * 8)
1077 if not buf:
1078 break
1079 file_size = file_size + len(buf)
1080 CRC = crc32(buf, CRC) & 0xffffffff
1081 if cmpr:
1082 buf = cmpr.compress(buf)
1083 compress_size = compress_size + len(buf)
1084 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if cmpr:
1086 buf = cmpr.flush()
1087 compress_size = compress_size + len(buf)
1088 self.fp.write(buf)
1089 zinfo.compress_size = compress_size
1090 else:
1091 zinfo.compress_size = file_size
1092 zinfo.CRC = CRC
1093 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001094 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001095 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001096 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001097 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001099 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 self.filelist.append(zinfo)
1101 self.NameToInfo[zinfo.filename] = zinfo
1102
Ronald Oussorendd25e862010-02-07 20:18:02 +00001103 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001104 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001105 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1106 the name of the file in the archive."""
1107 if not isinstance(zinfo_or_arcname, ZipInfo):
1108 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001109 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001110
Just van Rossumb083cb32002-12-12 12:23:32 +00001111 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001112 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001113 else:
1114 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001115
1116 if not self.fp:
1117 raise RuntimeError(
1118 "Attempt to write to ZIP archive that was already closed")
1119
Ronald Oussorendd25e862010-02-07 20:18:02 +00001120 if compress_type is not None:
1121 zinfo.compress_type = compress_type
1122
Tim Peterse1190062001-01-15 03:34:38 +00001123 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001124 zinfo.header_offset = self.fp.tell() # Start of header bytes
1125 self._writecheck(zinfo)
1126 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001127 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128 if zinfo.compress_type == ZIP_DEFLATED:
1129 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1130 zlib.DEFLATED, -15)
1131 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001132 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 else:
1134 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001135 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001136 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001137 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001138 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001140 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001141 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001142 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 self.filelist.append(zinfo)
1144 self.NameToInfo[zinfo.filename] = zinfo
1145
1146 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001147 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001148 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149
1150 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001151 """Close the file, and for mode "w" and "a" write the ending
1152 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001153 if self.fp is None:
1154 return
Tim Petersa608bb22006-06-15 18:06:29 +00001155
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001156 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157 count = 0
1158 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001159 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001160 count = count + 1
1161 dt = zinfo.date_time
1162 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001163 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001164 extra = []
1165 if zinfo.file_size > ZIP64_LIMIT \
1166 or zinfo.compress_size > ZIP64_LIMIT:
1167 extra.append(zinfo.file_size)
1168 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001169 file_size = 0xffffffff
1170 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001171 else:
1172 file_size = zinfo.file_size
1173 compress_size = zinfo.compress_size
1174
1175 if zinfo.header_offset > ZIP64_LIMIT:
1176 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001177 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001178 else:
1179 header_offset = zinfo.header_offset
1180
1181 extra_data = zinfo.extra
1182 if extra:
1183 # Append a ZIP64 field to the extra's
1184 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001185 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001186 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001187
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001188 extract_version = max(45, zinfo.extract_version)
1189 create_version = max(45, zinfo.create_version)
1190 else:
1191 extract_version = zinfo.extract_version
1192 create_version = zinfo.create_version
1193
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001194 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001195 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001196 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001197 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001198 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001199 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001200 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001201 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001202 0, zinfo.internal_attr, zinfo.external_attr,
1203 header_offset)
1204 except DeprecationWarning:
1205 print >>sys.stderr, (structCentralDir,
1206 stringCentralDir, create_version,
1207 zinfo.create_system, extract_version, zinfo.reserved,
1208 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1209 zinfo.CRC, compress_size, file_size,
1210 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1211 0, zinfo.internal_attr, zinfo.external_attr,
1212 header_offset)
1213 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001215 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001216 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001217 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001218
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 pos2 = self.fp.tell()
1220 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001221 centDirCount = count
1222 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001223 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001224 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1225 centDirOffset > ZIP64_LIMIT or
1226 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001227 # Need to write the ZIP64 end-of-archive records
1228 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001229 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001230 44, 45, 45, 0, 0, centDirCount, centDirCount,
1231 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001232 self.fp.write(zip64endrec)
1233
1234 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001235 structEndArchive64Locator,
1236 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001237 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001238 centDirCount = min(centDirCount, 0xFFFF)
1239 centDirSize = min(centDirSize, 0xFFFFFFFF)
1240 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001241
Martin v. Löwis8c436412008-07-03 12:51:14 +00001242 # check for valid comment length
1243 if len(self.comment) >= ZIP_MAX_COMMENT:
1244 if self.debug > 0:
1245 msg = 'Archive comment is too long; truncating to %d bytes' \
1246 % ZIP_MAX_COMMENT
1247 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001248
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001249 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001250 0, 0, centDirCount, centDirCount,
1251 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001252 self.fp.write(endrec)
1253 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001254 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001255
Fred Drake3d9091e2001-03-26 15:49:24 +00001256 if not self._filePassed:
1257 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 self.fp = None
1259
1260
1261class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001262 """Class to create ZIP archives with Python library files and packages."""
1263
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001264 def writepy(self, pathname, basename = ""):
1265 """Add all files from "pathname" to the ZIP archive.
1266
Fred Drake484d7352000-10-02 21:14:52 +00001267 If pathname is a package directory, search the directory and
1268 all package subdirectories recursively for all *.py and enter
1269 the modules into the archive. If pathname is a plain
1270 directory, listdir *.py and enter all modules. Else, pathname
1271 must be a Python *.py file and the module will be put into the
1272 archive. Added modules are always module.pyo or module.pyc.
1273 This method will compile the module.py into module.pyc if
1274 necessary.
1275 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001276 dir, name = os.path.split(pathname)
1277 if os.path.isdir(pathname):
1278 initname = os.path.join(pathname, "__init__.py")
1279 if os.path.isfile(initname):
1280 # This is a package directory, add it
1281 if basename:
1282 basename = "%s/%s" % (basename, name)
1283 else:
1284 basename = name
1285 if self.debug:
1286 print "Adding package in", pathname, "as", basename
1287 fname, arcname = self._get_codename(initname[0:-3], basename)
1288 if self.debug:
1289 print "Adding", arcname
1290 self.write(fname, arcname)
1291 dirlist = os.listdir(pathname)
1292 dirlist.remove("__init__.py")
1293 # Add all *.py files and package subdirectories
1294 for filename in dirlist:
1295 path = os.path.join(pathname, filename)
1296 root, ext = os.path.splitext(filename)
1297 if os.path.isdir(path):
1298 if os.path.isfile(os.path.join(path, "__init__.py")):
1299 # This is a package directory, add it
1300 self.writepy(path, basename) # Recursive call
1301 elif ext == ".py":
1302 fname, arcname = self._get_codename(path[0:-3],
1303 basename)
1304 if self.debug:
1305 print "Adding", arcname
1306 self.write(fname, arcname)
1307 else:
1308 # This is NOT a package directory, add its files at top level
1309 if self.debug:
1310 print "Adding files from directory", pathname
1311 for filename in os.listdir(pathname):
1312 path = os.path.join(pathname, filename)
1313 root, ext = os.path.splitext(filename)
1314 if ext == ".py":
1315 fname, arcname = self._get_codename(path[0:-3],
1316 basename)
1317 if self.debug:
1318 print "Adding", arcname
1319 self.write(fname, arcname)
1320 else:
1321 if pathname[-3:] != ".py":
1322 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001323 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 fname, arcname = self._get_codename(pathname[0:-3], basename)
1325 if self.debug:
1326 print "Adding file", arcname
1327 self.write(fname, arcname)
1328
1329 def _get_codename(self, pathname, basename):
1330 """Return (filename, archivename) for the path.
1331
Fred Drake484d7352000-10-02 21:14:52 +00001332 Given a module name path, return the correct file path and
1333 archive name, compiling if necessary. For example, given
1334 /python/lib/string, return (/python/lib/string.pyc, string).
1335 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 file_py = pathname + ".py"
1337 file_pyc = pathname + ".pyc"
1338 file_pyo = pathname + ".pyo"
1339 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001340 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001341 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001343 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001344 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 if self.debug:
1346 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001347 try:
1348 py_compile.compile(file_py, file_pyc, None, True)
1349 except py_compile.PyCompileError,err:
1350 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001351 fname = file_pyc
1352 else:
1353 fname = file_pyc
1354 archivename = os.path.split(fname)[1]
1355 if basename:
1356 archivename = "%s/%s" % (basename, archivename)
1357 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001358
1359
1360def main(args = None):
1361 import textwrap
1362 USAGE=textwrap.dedent("""\
1363 Usage:
1364 zipfile.py -l zipfile.zip # Show listing of a zipfile
1365 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1366 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1367 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1368 """)
1369 if args is None:
1370 args = sys.argv[1:]
1371
1372 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1373 print USAGE
1374 sys.exit(1)
1375
1376 if args[0] == '-l':
1377 if len(args) != 2:
1378 print USAGE
1379 sys.exit(1)
1380 zf = ZipFile(args[1], 'r')
1381 zf.printdir()
1382 zf.close()
1383
1384 elif args[0] == '-t':
1385 if len(args) != 2:
1386 print USAGE
1387 sys.exit(1)
1388 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001389 badfile = zf.testzip()
1390 if badfile:
1391 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001392 print "Done testing"
1393
1394 elif args[0] == '-e':
1395 if len(args) != 3:
1396 print USAGE
1397 sys.exit(1)
1398
1399 zf = ZipFile(args[1], 'r')
1400 out = args[2]
1401 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001402 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001403 tgt = os.path.join(out, path[2:])
1404 else:
1405 tgt = os.path.join(out, path)
1406
1407 tgtdir = os.path.dirname(tgt)
1408 if not os.path.exists(tgtdir):
1409 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001410 with open(tgt, 'wb') as fp:
1411 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001412 zf.close()
1413
1414 elif args[0] == '-c':
1415 if len(args) < 3:
1416 print USAGE
1417 sys.exit(1)
1418
1419 def addToZip(zf, path, zippath):
1420 if os.path.isfile(path):
1421 zf.write(path, zippath, ZIP_DEFLATED)
1422 elif os.path.isdir(path):
1423 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001424 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001425 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001426 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001427
1428 zf = ZipFile(args[1], 'w', allowZip64=True)
1429 for src in args[2:]:
1430 addToZip(zf, src, os.path.basename(src))
1431
1432 zf.close()
1433
1434if __name__ == "__main__":
1435 main()