blob: 7e3caf0aa840d38064080fe425573090f03e35aa [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000239 comment = data[start+sizeEndCentDir:]
240 # check that comment length is correct
241 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000242 # Append the archive comment and start offset
243 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000244 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000245
246 # Try to read the "Zip64 end of central directory" structure
247 return _EndRecData64(fpin, maxCommentStart + start - filesize,
248 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000249
250 # Unable to find a valid end of central directory structure
251 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252
Fred Drake484d7352000-10-02 21:14:52 +0000253
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000254class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000255 """Class with attributes describing each file in the ZIP archive."""
256
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000257 __slots__ = (
258 'orig_filename',
259 'filename',
260 'date_time',
261 'compress_type',
262 'comment',
263 'extra',
264 'create_system',
265 'create_version',
266 'extract_version',
267 'reserved',
268 'flag_bits',
269 'volume',
270 'internal_attr',
271 'external_attr',
272 'header_offset',
273 'CRC',
274 'compress_size',
275 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000276 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000277 )
278
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000279 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000280 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281
282 # Terminate the file name at the first null byte. Null bytes in file
283 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000284 null_byte = filename.find(chr(0))
285 if null_byte >= 0:
286 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000287 # This is used to ensure paths in generated ZIP files always use
288 # forward slashes as the directory separator, as required by the
289 # ZIP format specification.
290 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000291 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000292
Greg Ward8e36d282003-06-18 00:53:06 +0000293 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000294 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000295 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000296 self.compress_type = ZIP_STORED # Type of compression for the file
297 self.comment = "" # Comment for each file
298 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000299 if sys.platform == 'win32':
300 self.create_system = 0 # System which created ZIP archive
301 else:
302 # Assume everything else is unix-y
303 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000304 self.create_version = 20 # Version which created ZIP archive
305 self.extract_version = 20 # Version needed to extract archive
306 self.reserved = 0 # Must be zero
307 self.flag_bits = 0 # ZIP flag bits
308 self.volume = 0 # Volume number of file header
309 self.internal_attr = 0 # Internal attributes
310 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000311 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000312 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000313 # CRC CRC-32 of the uncompressed file
314 # compress_size Size of the compressed file
315 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316
317 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 dt = self.date_time
320 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000321 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000323 # Set these to zero because we write them after the file data
324 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325 else:
Tim Peterse1190062001-01-15 03:34:38 +0000326 CRC = self.CRC
327 compress_size = self.compress_size
328 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000329
330 extra = self.extra
331
332 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
333 # File is larger than what fits into a 4 byte integer,
334 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000335 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000336 extra = extra + struct.pack(fmt,
337 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000338 file_size = 0xffffffff
339 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000340 self.extract_version = max(45, self.extract_version)
341 self.create_version = max(45, self.extract_version)
342
Martin v. Löwis471617d2008-05-05 17:16:58 +0000343 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000344 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000345 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000346 self.compress_type, dostime, dosdate, CRC,
347 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000348 len(filename), len(extra))
349 return header + filename + extra
350
351 def _encodeFilenameFlags(self):
352 if isinstance(self.filename, unicode):
353 try:
354 return self.filename.encode('ascii'), self.flag_bits
355 except UnicodeEncodeError:
356 return self.filename.encode('utf-8'), self.flag_bits | 0x800
357 else:
358 return self.filename, self.flag_bits
359
360 def _decodeFilename(self):
361 if self.flag_bits & 0x800:
362 return self.filename.decode('utf-8')
363 else:
364 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000365
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 def _decodeExtra(self):
367 # Try to decode the extra field.
368 extra = self.extra
369 unpack = struct.unpack
370 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000371 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000372 if tp == 1:
373 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000374 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000375 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 elif ln == 0:
380 counts = ()
381 else:
382 raise RuntimeError, "Corrupt extra field %s"%(ln,)
383
384 idx = 0
385
386 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000387 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000388 self.file_size = counts[idx]
389 idx += 1
390
Martin v. Löwis8c436412008-07-03 12:51:14 +0000391 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000392 self.compress_size = counts[idx]
393 idx += 1
394
Martin v. Löwis8c436412008-07-03 12:51:14 +0000395 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000396 old = self.header_offset
397 self.header_offset = counts[idx]
398 idx+=1
399
400 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000401
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000402
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000403class _ZipDecrypter:
404 """Class to handle decryption of files stored within a ZIP archive.
405
406 ZIP supports a password-based form of encryption. Even though known
407 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000408 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000409
410 Usage:
411 zd = _ZipDecrypter(mypwd)
412 plain_char = zd(cypher_char)
413 plain_text = map(zd, cypher_text)
414 """
415
416 def _GenerateCRCTable():
417 """Generate a CRC-32 table.
418
419 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
420 internal keys. We noticed that a direct implementation is faster than
421 relying on binascii.crc32().
422 """
423 poly = 0xedb88320
424 table = [0] * 256
425 for i in range(256):
426 crc = i
427 for j in range(8):
428 if crc & 1:
429 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
430 else:
431 crc = ((crc >> 1) & 0x7FFFFFFF)
432 table[i] = crc
433 return table
434 crctable = _GenerateCRCTable()
435
436 def _crc32(self, ch, crc):
437 """Compute the CRC32 primitive on one byte."""
438 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
439
440 def __init__(self, pwd):
441 self.key0 = 305419896
442 self.key1 = 591751049
443 self.key2 = 878082192
444 for p in pwd:
445 self._UpdateKeys(p)
446
447 def _UpdateKeys(self, c):
448 self.key0 = self._crc32(c, self.key0)
449 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
450 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
451 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
452
453 def __call__(self, c):
454 """Decrypt a single character."""
455 c = ord(c)
456 k = self.key2 | 2
457 c = c ^ (((k * (k^1)) >> 8) & 255)
458 c = chr(c)
459 self._UpdateKeys(c)
460 return c
461
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000462class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000464 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000465 """
Tim Petersea5962f2007-03-12 18:07:52 +0000466
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000467 # Max size supported by decompressor.
468 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000469
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000470 # Read from compressed files in 4k blocks.
471 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000472
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000473 # Search for universal newlines or line chunks.
474 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
475
476 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
477 self._fileobj = fileobj
478 self._decrypter = decrypter
479
Ezio Melotti4611b052010-01-28 01:41:30 +0000480 self._compress_type = zipinfo.compress_type
481 self._compress_size = zipinfo.compress_size
482 self._compress_left = zipinfo.compress_size
483
484 if self._compress_type == ZIP_DEFLATED:
485 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000486 self._unconsumed = ''
487
488 self._readbuffer = ''
489 self._offset = 0
490
491 self._universal = 'U' in mode
492 self.newlines = None
493
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000494 # Adjust read size for encrypted files since the first 12 bytes
495 # are for the encryption/password information.
496 if self._decrypter is not None:
497 self._compress_left -= 12
498
499 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000500 self.name = zipinfo.filename
501
Antoine Pitroue1436d12010-08-12 15:25:51 +0000502 if hasattr(zipinfo, 'CRC'):
503 self._expected_crc = zipinfo.CRC
504 self._running_crc = crc32(b'') & 0xffffffff
505 else:
506 self._expected_crc = None
507
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000508 def readline(self, limit=-1):
509 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000510
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000511 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000512 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000513
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000514 if not self._universal and limit < 0:
515 # Shortcut common case - newline found in buffer.
516 i = self._readbuffer.find('\n', self._offset) + 1
517 if i > 0:
518 line = self._readbuffer[self._offset: i]
519 self._offset = i
520 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000521
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000522 if not self._universal:
523 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000524
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000525 line = ''
526 while limit < 0 or len(line) < limit:
527 readahead = self.peek(2)
528 if readahead == '':
529 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000530
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000531 #
532 # Search for universal newlines or line chunks.
533 #
534 # The pattern returns either a line chunk or a newline, but not
535 # both. Combined with peek(2), we are assured that the sequence
536 # '\r\n' is always retrieved completely and never split into
537 # separate newlines - '\r', '\n' due to coincidental readaheads.
538 #
539 match = self.PATTERN.search(readahead)
540 newline = match.group('newline')
541 if newline is not None:
542 if self.newlines is None:
543 self.newlines = []
544 if newline not in self.newlines:
545 self.newlines.append(newline)
546 self._offset += len(newline)
547 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000548
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000549 chunk = match.group('chunk')
550 if limit >= 0:
551 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000552
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000553 self._offset += len(chunk)
554 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000555
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000556 return line
557
558 def peek(self, n=1):
559 """Returns buffered bytes without advancing the position."""
560 if n > len(self._readbuffer) - self._offset:
561 chunk = self.read(n)
562 self._offset -= len(chunk)
563
564 # Return up to 512 bytes to reduce allocation overhead for tight loops.
565 return self._readbuffer[self._offset: self._offset + 512]
566
567 def readable(self):
568 return True
569
570 def read(self, n=-1):
571 """Read and return up to n bytes.
572 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000573 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000574 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000575 if n is None:
576 n = -1
577 while True:
578 if n < 0:
579 data = self.read1(n)
580 elif n > len(buf):
581 data = self.read1(n - len(buf))
582 else:
583 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000584 if len(data) == 0:
585 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000586 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000587
Antoine Pitroue1436d12010-08-12 15:25:51 +0000588 def _update_crc(self, newdata, eof):
589 # Update the CRC using the given data.
590 if self._expected_crc is None:
591 # No need to compute the CRC if we don't have a reference value
592 return
593 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
594 # Check the CRC if we're at the end of the file
595 if eof and self._running_crc != self._expected_crc:
596 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
597
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000598 def read1(self, n):
599 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000600
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000601 # Simplify algorithm (branching) by transforming negative n to large n.
602 if n < 0 or n is None:
603 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000604
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000605 # Bytes available in read buffer.
606 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000607
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000608 # Read from file.
609 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
610 nbytes = n - len_readbuffer - len(self._unconsumed)
611 nbytes = max(nbytes, self.MIN_READ_SIZE)
612 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000613
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000614 data = self._fileobj.read(nbytes)
615 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000616
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000617 if data and self._decrypter is not None:
618 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000619
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000620 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000621 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000622 self._readbuffer = self._readbuffer[self._offset:] + data
623 self._offset = 0
624 else:
625 # Prepare deflated bytes for decompression.
626 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000627
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000628 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000629 if (len(self._unconsumed) > 0 and n > len_readbuffer and
630 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000631 data = self._decompressor.decompress(
632 self._unconsumed,
633 max(n - len_readbuffer, self.MIN_READ_SIZE)
634 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000635
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000636 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000637 eof = len(self._unconsumed) == 0 and self._compress_left == 0
638 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000639 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000640
Antoine Pitroue1436d12010-08-12 15:25:51 +0000641 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000642 self._readbuffer = self._readbuffer[self._offset:] + data
643 self._offset = 0
644
645 # Read from buffer.
646 data = self._readbuffer[self._offset: self._offset + n]
647 self._offset += len(data)
648 return data
649
Tim Petersea5962f2007-03-12 18:07:52 +0000650
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000651
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000652class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000653 """ Class with methods to open, read, write, close, list zip files.
654
Martin v. Löwis8c436412008-07-03 12:51:14 +0000655 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000656
Fred Drake3d9091e2001-03-26 15:49:24 +0000657 file: Either the path to the file, or a file-like object.
658 If it is a path, the file will be opened and closed by ZipFile.
659 mode: The mode can be either read "r", write "w" or append "a".
660 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000661 allowZip64: if True ZipFile will create files with ZIP64 extensions when
662 needed, otherwise it will raise an exception when this would
663 be necessary.
664
Fred Drake3d9091e2001-03-26 15:49:24 +0000665 """
Fred Drake484d7352000-10-02 21:14:52 +0000666
Fred Drake90eac282001-02-28 05:29:34 +0000667 fp = None # Set here since __del__ checks it
668
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000669 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000670 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000671 if mode not in ("r", "w", "a"):
672 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
673
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000674 if compression == ZIP_STORED:
675 pass
676 elif compression == ZIP_DEFLATED:
677 if not zlib:
678 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000679 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 else:
681 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000682
683 self._allowZip64 = allowZip64
684 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000685 self.debug = 0 # Level of printing: 0 through 3
686 self.NameToInfo = {} # Find file info given name
687 self.filelist = [] # List of ZipInfo instances for archive
688 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000689 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000690 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000691 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000692
Fred Drake3d9091e2001-03-26 15:49:24 +0000693 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000694 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 self._filePassed = 0
696 self.filename = file
697 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000698 try:
699 self.fp = open(file, modeDict[mode])
700 except IOError:
701 if mode == 'a':
702 mode = key = 'w'
703 self.fp = open(file, modeDict[mode])
704 else:
705 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000706 else:
707 self._filePassed = 1
708 self.fp = file
709 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000710
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 self._GetContents()
713 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000714 # set the modified flag so central directory gets written
715 # even if no files are added to the archive
716 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 elif key == 'a':
Georg Brandl86e0c892010-11-26 07:22:28 +0000718 try:
719 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000720 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000722 self.fp.seek(self.start_dir, 0)
Georg Brandl86e0c892010-11-26 07:22:28 +0000723 except BadZipfile:
724 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000725 self.fp.seek(0, 2)
Georg Brandl86e0c892010-11-26 07:22:28 +0000726
727 # set the modified flag so central directory gets written
728 # even if no files are added to the archive
729 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000731 if not self._filePassed:
732 self.fp.close()
733 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 raise RuntimeError, 'Mode must be "r", "w" or "a"'
735
Ezio Melotti569e61f2009-12-30 06:14:51 +0000736 def __enter__(self):
737 return self
738
739 def __exit__(self, type, value, traceback):
740 self.close()
741
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000743 """Read the directory, making sure we close the file if the format
744 is bad."""
745 try:
746 self._RealGetContents()
747 except BadZipfile:
748 if not self._filePassed:
749 self.fp.close()
750 self.fp = None
751 raise
752
753 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000754 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000756 try:
757 endrec = _EndRecData(fp)
758 except IOError:
759 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000760 if not endrec:
761 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 if self.debug > 1:
763 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000764 size_cd = endrec[_ECD_SIZE] # bytes in central directory
765 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
766 self.comment = endrec[_ECD_COMMENT] # archive comment
767
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000768 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000769 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000770 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
771 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000772 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
773
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000775 inferred = concat + offset_cd
776 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 # self.start_dir: Position of start of central directory
778 self.start_dir = offset_cd + concat
779 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000780 data = fp.read(size_cd)
781 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 total = 0
783 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000784 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000785 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 raise BadZipfile, "Bad magic number for central directory"
787 centdir = struct.unpack(structCentralDir, centdir)
788 if self.debug > 2:
789 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000790 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 # Create ZipInfo instance to store file information
792 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000793 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
794 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000795 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 (x.create_version, x.create_system, x.extract_version, x.reserved,
797 x.flag_bits, x.compress_type, t, d,
798 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
799 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
800 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000801 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000803 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000804
805 x._decodeExtra()
806 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000807 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000808 self.filelist.append(x)
809 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000810
811 # update total bytes read from central directory
812 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
813 + centdir[_CD_EXTRA_FIELD_LENGTH]
814 + centdir[_CD_COMMENT_LENGTH])
815
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816 if self.debug > 2:
817 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000818
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819
820 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000821 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 l = []
823 for data in self.filelist:
824 l.append(data.filename)
825 return l
826
827 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000828 """Return a list of class ZipInfo instances for files in the
829 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 return self.filelist
831
832 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000833 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
835 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000836 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
838
839 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000840 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000841 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842 for zinfo in self.filelist:
843 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000844 # Read by chunks, to avoid an OverflowError or a
845 # MemoryError with very large embedded files.
846 f = self.open(zinfo.filename, "r")
847 while f.read(chunk_size): # Check CRC-32
848 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000849 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 return zinfo.filename
851
852 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000853 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000854 info = self.NameToInfo.get(name)
855 if info is None:
856 raise KeyError(
857 'There is no item named %r in the archive' % name)
858
859 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000860
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000861 def setpassword(self, pwd):
862 """Set default password for encrypted files."""
863 self.pwd = pwd
864
865 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000866 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000867 return self.open(name, "r", pwd).read()
868
869 def open(self, name, mode="r", pwd=None):
870 """Return file-like object for 'name'."""
871 if mode not in ("r", "U", "rU"):
872 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873 if not self.fp:
874 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000875 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000876
Tim Petersea5962f2007-03-12 18:07:52 +0000877 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000878 # given a file object in the constructor
879 if self._filePassed:
880 zef_file = self.fp
881 else:
882 zef_file = open(self.filename, 'rb')
883
Georg Brandl112aa502008-05-20 08:25:48 +0000884 # Make sure we have an info object
885 if isinstance(name, ZipInfo):
886 # 'name' is already an info object
887 zinfo = name
888 else:
889 # Get info object for name
890 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000891
892 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000893
894 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000895 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000896 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000897 raise BadZipfile, "Bad magic number for file header"
898
899 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000900 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000901 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000902 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000903
904 if fname != zinfo.orig_filename:
905 raise BadZipfile, \
906 'File name in directory "%s" and header "%s" differ.' % (
907 zinfo.orig_filename, fname)
908
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000909 # check for encrypted flag & handle password
910 is_encrypted = zinfo.flag_bits & 0x1
911 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000912 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000913 if not pwd:
914 pwd = self.pwd
915 if not pwd:
916 raise RuntimeError, "File %s is encrypted, " \
917 "password required for extraction" % name
918
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000919 zd = _ZipDecrypter(pwd)
920 # The first 12 bytes in the cypher stream is an encryption header
921 # used to strengthen the algorithm. The first 11 bytes are
922 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000923 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000924 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000925 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000926 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000927 if zinfo.flag_bits & 0x8:
928 # compare against the file type from extended local headers
929 check_byte = (zinfo._raw_time >> 8) & 0xff
930 else:
931 # compare against the CRC otherwise
932 check_byte = (zinfo.CRC >> 24) & 0xff
933 if ord(h[11]) != check_byte:
934 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000935
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000936 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000937
Georg Brandl62416bc2008-01-07 18:47:44 +0000938 def extract(self, member, path=None, pwd=None):
939 """Extract a member from the archive to the current working directory,
940 using its full name. Its file information is extracted as accurately
941 as possible. `member' may be a filename or a ZipInfo object. You can
942 specify a different directory using `path'.
943 """
944 if not isinstance(member, ZipInfo):
945 member = self.getinfo(member)
946
947 if path is None:
948 path = os.getcwd()
949
950 return self._extract_member(member, path, pwd)
951
952 def extractall(self, path=None, members=None, pwd=None):
953 """Extract all members from the archive to the current working
954 directory. `path' specifies a different directory to extract to.
955 `members' is optional and must be a subset of the list returned
956 by namelist().
957 """
958 if members is None:
959 members = self.namelist()
960
961 for zipinfo in members:
962 self.extract(zipinfo, path, pwd)
963
964 def _extract_member(self, member, targetpath, pwd):
965 """Extract the ZipInfo object 'member' to a physical
966 file on the path targetpath.
967 """
968 # build the destination pathname, replacing
969 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000970 # Strip trailing path separator, unless it represents the root.
971 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
972 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000973 targetpath = targetpath[:-1]
974
975 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000976 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000977 targetpath = os.path.join(targetpath, member.filename[1:])
978 else:
979 targetpath = os.path.join(targetpath, member.filename)
980
981 targetpath = os.path.normpath(targetpath)
982
983 # Create all upper directories if necessary.
984 upperdirs = os.path.dirname(targetpath)
985 if upperdirs and not os.path.exists(upperdirs):
986 os.makedirs(upperdirs)
987
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000988 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000989 if not os.path.isdir(targetpath):
990 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000991 return targetpath
992
Georg Brandl112aa502008-05-20 08:25:48 +0000993 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000994 target = file(targetpath, "wb")
995 shutil.copyfileobj(source, target)
996 source.close()
997 target.close()
998
999 return targetpath
1000
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001002 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001003 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001004 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 print "Duplicate name:", zinfo.filename
1006 if self.mode not in ("w", "a"):
1007 raise RuntimeError, 'write() requires mode "w" or "a"'
1008 if not self.fp:
1009 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001010 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001011 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1012 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001013 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001014 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1015 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001016 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001017 if zinfo.file_size > ZIP64_LIMIT:
1018 if not self._allowZip64:
1019 raise LargeZipFile("Filesize would require ZIP64 extensions")
1020 if zinfo.header_offset > ZIP64_LIMIT:
1021 if not self._allowZip64:
1022 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023
1024 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001025 """Put the bytes from filename into the archive under the name
1026 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001027 if not self.fp:
1028 raise RuntimeError(
1029 "Attempt to write to ZIP archive that was already closed")
1030
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001031 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001032 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001033 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 date_time = mtime[0:6]
1035 # Create ZipInfo instance to store file information
1036 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001037 arcname = filename
1038 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1039 while arcname[0] in (os.sep, os.altsep):
1040 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001041 if isdir:
1042 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001043 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001044 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001045 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001046 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047 else:
Tim Peterse1190062001-01-15 03:34:38 +00001048 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001049
1050 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001051 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001052 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001053
1054 self._writecheck(zinfo)
1055 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001056
1057 if isdir:
1058 zinfo.file_size = 0
1059 zinfo.compress_size = 0
1060 zinfo.CRC = 0
1061 self.filelist.append(zinfo)
1062 self.NameToInfo[zinfo.filename] = zinfo
1063 self.fp.write(zinfo.FileHeader())
1064 return
1065
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001066 with open(filename, "rb") as fp:
1067 # Must overwrite CRC and sizes with correct data later
1068 zinfo.CRC = CRC = 0
1069 zinfo.compress_size = compress_size = 0
1070 zinfo.file_size = file_size = 0
1071 self.fp.write(zinfo.FileHeader())
1072 if zinfo.compress_type == ZIP_DEFLATED:
1073 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1074 zlib.DEFLATED, -15)
1075 else:
1076 cmpr = None
1077 while 1:
1078 buf = fp.read(1024 * 8)
1079 if not buf:
1080 break
1081 file_size = file_size + len(buf)
1082 CRC = crc32(buf, CRC) & 0xffffffff
1083 if cmpr:
1084 buf = cmpr.compress(buf)
1085 compress_size = compress_size + len(buf)
1086 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 if cmpr:
1088 buf = cmpr.flush()
1089 compress_size = compress_size + len(buf)
1090 self.fp.write(buf)
1091 zinfo.compress_size = compress_size
1092 else:
1093 zinfo.compress_size = file_size
1094 zinfo.CRC = CRC
1095 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001096 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001097 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001098 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001099 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001101 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 self.filelist.append(zinfo)
1103 self.NameToInfo[zinfo.filename] = zinfo
1104
Ronald Oussorendd25e862010-02-07 20:18:02 +00001105 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001106 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001107 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1108 the name of the file in the archive."""
1109 if not isinstance(zinfo_or_arcname, ZipInfo):
1110 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001111 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001112
Just van Rossumb083cb32002-12-12 12:23:32 +00001113 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001114 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001115 else:
1116 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001117
1118 if not self.fp:
1119 raise RuntimeError(
1120 "Attempt to write to ZIP archive that was already closed")
1121
Ronald Oussorendd25e862010-02-07 20:18:02 +00001122 if compress_type is not None:
1123 zinfo.compress_type = compress_type
1124
Tim Peterse1190062001-01-15 03:34:38 +00001125 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001126 zinfo.header_offset = self.fp.tell() # Start of header bytes
1127 self._writecheck(zinfo)
1128 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001129 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001130 if zinfo.compress_type == ZIP_DEFLATED:
1131 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1132 zlib.DEFLATED, -15)
1133 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001134 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 else:
1136 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001137 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001138 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001140 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001142 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001143 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001144 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001145 self.filelist.append(zinfo)
1146 self.NameToInfo[zinfo.filename] = zinfo
1147
1148 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001149 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001150 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001151
1152 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001153 """Close the file, and for mode "w" and "a" write the ending
1154 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001155 if self.fp is None:
1156 return
Tim Petersa608bb22006-06-15 18:06:29 +00001157
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001158 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159 count = 0
1160 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001161 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001162 count = count + 1
1163 dt = zinfo.date_time
1164 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001165 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001166 extra = []
1167 if zinfo.file_size > ZIP64_LIMIT \
1168 or zinfo.compress_size > ZIP64_LIMIT:
1169 extra.append(zinfo.file_size)
1170 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001171 file_size = 0xffffffff
1172 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001173 else:
1174 file_size = zinfo.file_size
1175 compress_size = zinfo.compress_size
1176
1177 if zinfo.header_offset > ZIP64_LIMIT:
1178 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001179 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001180 else:
1181 header_offset = zinfo.header_offset
1182
1183 extra_data = zinfo.extra
1184 if extra:
1185 # Append a ZIP64 field to the extra's
1186 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001187 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001188 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001189
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001190 extract_version = max(45, zinfo.extract_version)
1191 create_version = max(45, zinfo.create_version)
1192 else:
1193 extract_version = zinfo.extract_version
1194 create_version = zinfo.create_version
1195
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001196 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001197 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001198 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001199 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001200 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001201 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001202 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001203 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001204 0, zinfo.internal_attr, zinfo.external_attr,
1205 header_offset)
1206 except DeprecationWarning:
1207 print >>sys.stderr, (structCentralDir,
1208 stringCentralDir, create_version,
1209 zinfo.create_system, extract_version, zinfo.reserved,
1210 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1211 zinfo.CRC, compress_size, file_size,
1212 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1213 0, zinfo.internal_attr, zinfo.external_attr,
1214 header_offset)
1215 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001216 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001217 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001218 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001220
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 pos2 = self.fp.tell()
1222 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001223 centDirCount = count
1224 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001225 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001226 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1227 centDirOffset > ZIP64_LIMIT or
1228 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001229 # Need to write the ZIP64 end-of-archive records
1230 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001231 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001232 44, 45, 45, 0, 0, centDirCount, centDirCount,
1233 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001234 self.fp.write(zip64endrec)
1235
1236 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001237 structEndArchive64Locator,
1238 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001239 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001240 centDirCount = min(centDirCount, 0xFFFF)
1241 centDirSize = min(centDirSize, 0xFFFFFFFF)
1242 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001243
Martin v. Löwis8c436412008-07-03 12:51:14 +00001244 # check for valid comment length
1245 if len(self.comment) >= ZIP_MAX_COMMENT:
1246 if self.debug > 0:
1247 msg = 'Archive comment is too long; truncating to %d bytes' \
1248 % ZIP_MAX_COMMENT
1249 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001250
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001251 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001252 0, 0, centDirCount, centDirCount,
1253 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001254 self.fp.write(endrec)
1255 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001256 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001257
Fred Drake3d9091e2001-03-26 15:49:24 +00001258 if not self._filePassed:
1259 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 self.fp = None
1261
1262
1263class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001264 """Class to create ZIP archives with Python library files and packages."""
1265
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001266 def writepy(self, pathname, basename = ""):
1267 """Add all files from "pathname" to the ZIP archive.
1268
Fred Drake484d7352000-10-02 21:14:52 +00001269 If pathname is a package directory, search the directory and
1270 all package subdirectories recursively for all *.py and enter
1271 the modules into the archive. If pathname is a plain
1272 directory, listdir *.py and enter all modules. Else, pathname
1273 must be a Python *.py file and the module will be put into the
1274 archive. Added modules are always module.pyo or module.pyc.
1275 This method will compile the module.py into module.pyc if
1276 necessary.
1277 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001278 dir, name = os.path.split(pathname)
1279 if os.path.isdir(pathname):
1280 initname = os.path.join(pathname, "__init__.py")
1281 if os.path.isfile(initname):
1282 # This is a package directory, add it
1283 if basename:
1284 basename = "%s/%s" % (basename, name)
1285 else:
1286 basename = name
1287 if self.debug:
1288 print "Adding package in", pathname, "as", basename
1289 fname, arcname = self._get_codename(initname[0:-3], basename)
1290 if self.debug:
1291 print "Adding", arcname
1292 self.write(fname, arcname)
1293 dirlist = os.listdir(pathname)
1294 dirlist.remove("__init__.py")
1295 # Add all *.py files and package subdirectories
1296 for filename in dirlist:
1297 path = os.path.join(pathname, filename)
1298 root, ext = os.path.splitext(filename)
1299 if os.path.isdir(path):
1300 if os.path.isfile(os.path.join(path, "__init__.py")):
1301 # This is a package directory, add it
1302 self.writepy(path, basename) # Recursive call
1303 elif ext == ".py":
1304 fname, arcname = self._get_codename(path[0:-3],
1305 basename)
1306 if self.debug:
1307 print "Adding", arcname
1308 self.write(fname, arcname)
1309 else:
1310 # This is NOT a package directory, add its files at top level
1311 if self.debug:
1312 print "Adding files from directory", pathname
1313 for filename in os.listdir(pathname):
1314 path = os.path.join(pathname, filename)
1315 root, ext = os.path.splitext(filename)
1316 if ext == ".py":
1317 fname, arcname = self._get_codename(path[0:-3],
1318 basename)
1319 if self.debug:
1320 print "Adding", arcname
1321 self.write(fname, arcname)
1322 else:
1323 if pathname[-3:] != ".py":
1324 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001325 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 fname, arcname = self._get_codename(pathname[0:-3], basename)
1327 if self.debug:
1328 print "Adding file", arcname
1329 self.write(fname, arcname)
1330
1331 def _get_codename(self, pathname, basename):
1332 """Return (filename, archivename) for the path.
1333
Fred Drake484d7352000-10-02 21:14:52 +00001334 Given a module name path, return the correct file path and
1335 archive name, compiling if necessary. For example, given
1336 /python/lib/string, return (/python/lib/string.pyc, string).
1337 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 file_py = pathname + ".py"
1339 file_pyc = pathname + ".pyc"
1340 file_pyo = pathname + ".pyo"
1341 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001342 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001343 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001345 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001346 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001347 if self.debug:
1348 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001349 try:
1350 py_compile.compile(file_py, file_pyc, None, True)
1351 except py_compile.PyCompileError,err:
1352 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353 fname = file_pyc
1354 else:
1355 fname = file_pyc
1356 archivename = os.path.split(fname)[1]
1357 if basename:
1358 archivename = "%s/%s" % (basename, archivename)
1359 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001360
1361
1362def main(args = None):
1363 import textwrap
1364 USAGE=textwrap.dedent("""\
1365 Usage:
1366 zipfile.py -l zipfile.zip # Show listing of a zipfile
1367 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1368 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1369 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1370 """)
1371 if args is None:
1372 args = sys.argv[1:]
1373
1374 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1375 print USAGE
1376 sys.exit(1)
1377
1378 if args[0] == '-l':
1379 if len(args) != 2:
1380 print USAGE
1381 sys.exit(1)
1382 zf = ZipFile(args[1], 'r')
1383 zf.printdir()
1384 zf.close()
1385
1386 elif args[0] == '-t':
1387 if len(args) != 2:
1388 print USAGE
1389 sys.exit(1)
1390 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001391 badfile = zf.testzip()
1392 if badfile:
1393 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001394 print "Done testing"
1395
1396 elif args[0] == '-e':
1397 if len(args) != 3:
1398 print USAGE
1399 sys.exit(1)
1400
1401 zf = ZipFile(args[1], 'r')
1402 out = args[2]
1403 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001404 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001405 tgt = os.path.join(out, path[2:])
1406 else:
1407 tgt = os.path.join(out, path)
1408
1409 tgtdir = os.path.dirname(tgt)
1410 if not os.path.exists(tgtdir):
1411 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001412 with open(tgt, 'wb') as fp:
1413 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001414 zf.close()
1415
1416 elif args[0] == '-c':
1417 if len(args) < 3:
1418 print USAGE
1419 sys.exit(1)
1420
1421 def addToZip(zf, path, zippath):
1422 if os.path.isfile(path):
1423 zf.write(path, zippath, ZIP_DEFLATED)
1424 elif os.path.isdir(path):
1425 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001426 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001427 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001428 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001429
1430 zf = ZipFile(args[1], 'w', allowZip64=True)
1431 for src in args[2:]:
1432 addToZip(zf, src, os.path.basename(src))
1433
1434 zf.close()
1435
1436if __name__ == "__main__":
1437 main()