blob: f340aa65758d55d9a2a9b483d892e3969d35e264 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800293
294 if date_time[0] < 1980:
295 raise ValueError('ZIP does not support timestamps before 1980')
296
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.compress_type = ZIP_STORED # Type of compression for the file
299 self.comment = "" # Comment for each file
300 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000301 if sys.platform == 'win32':
302 self.create_system = 0 # System which created ZIP archive
303 else:
304 # Assume everything else is unix-y
305 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.create_version = 20 # Version which created ZIP archive
307 self.extract_version = 20 # Version needed to extract archive
308 self.reserved = 0 # Must be zero
309 self.flag_bits = 0 # ZIP flag bits
310 self.volume = 0 # Volume number of file header
311 self.internal_attr = 0 # Internal attributes
312 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000315 # CRC CRC-32 of the uncompressed file
316 # compress_size Size of the compressed file
317 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200319 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 dt = self.date_time
322 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000323 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000325 # Set these to zero because we write them after the file data
326 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 else:
Tim Peterse1190062001-01-15 03:34:38 +0000328 CRC = self.CRC
329 compress_size = self.compress_size
330 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000331
332 extra = self.extra
333
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200334 if zip64 is None:
335 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
336 if zip64:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000337 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 extra = extra + struct.pack(fmt,
339 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200340 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
341 if not zip64:
342 raise LargeZipFile("Filesize would require ZIP64 extensions")
343 # File is larger than what fits into a 4 byte integer,
344 # fall back to the ZIP64 extension
Martin v. Löwis8c436412008-07-03 12:51:14 +0000345 file_size = 0xffffffff
346 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000347 self.extract_version = max(45, self.extract_version)
348 self.create_version = max(45, self.extract_version)
349
Martin v. Löwis471617d2008-05-05 17:16:58 +0000350 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000351 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000352 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 self.compress_type, dostime, dosdate, CRC,
354 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000355 len(filename), len(extra))
356 return header + filename + extra
357
358 def _encodeFilenameFlags(self):
359 if isinstance(self.filename, unicode):
360 try:
361 return self.filename.encode('ascii'), self.flag_bits
362 except UnicodeEncodeError:
363 return self.filename.encode('utf-8'), self.flag_bits | 0x800
364 else:
365 return self.filename, self.flag_bits
366
367 def _decodeFilename(self):
368 if self.flag_bits & 0x800:
369 return self.filename.decode('utf-8')
370 else:
371 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000372
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 def _decodeExtra(self):
374 # Try to decode the extra field.
375 extra = self.extra
376 unpack = struct.unpack
377 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 if tp == 1:
380 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000381 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000383 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000384 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000385 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 elif ln == 0:
387 counts = ()
388 else:
389 raise RuntimeError, "Corrupt extra field %s"%(ln,)
390
391 idx = 0
392
393 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000394 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000395 self.file_size = counts[idx]
396 idx += 1
397
Martin v. Löwis8c436412008-07-03 12:51:14 +0000398 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000399 self.compress_size = counts[idx]
400 idx += 1
401
Martin v. Löwis8c436412008-07-03 12:51:14 +0000402 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000403 old = self.header_offset
404 self.header_offset = counts[idx]
405 idx+=1
406
407 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000408
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000410class _ZipDecrypter:
411 """Class to handle decryption of files stored within a ZIP archive.
412
413 ZIP supports a password-based form of encryption. Even though known
414 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000415 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000416
417 Usage:
418 zd = _ZipDecrypter(mypwd)
419 plain_char = zd(cypher_char)
420 plain_text = map(zd, cypher_text)
421 """
422
423 def _GenerateCRCTable():
424 """Generate a CRC-32 table.
425
426 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
427 internal keys. We noticed that a direct implementation is faster than
428 relying on binascii.crc32().
429 """
430 poly = 0xedb88320
431 table = [0] * 256
432 for i in range(256):
433 crc = i
434 for j in range(8):
435 if crc & 1:
436 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
437 else:
438 crc = ((crc >> 1) & 0x7FFFFFFF)
439 table[i] = crc
440 return table
441 crctable = _GenerateCRCTable()
442
443 def _crc32(self, ch, crc):
444 """Compute the CRC32 primitive on one byte."""
445 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
446
447 def __init__(self, pwd):
448 self.key0 = 305419896
449 self.key1 = 591751049
450 self.key2 = 878082192
451 for p in pwd:
452 self._UpdateKeys(p)
453
454 def _UpdateKeys(self, c):
455 self.key0 = self._crc32(c, self.key0)
456 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
457 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
458 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
459
460 def __call__(self, c):
461 """Decrypt a single character."""
462 c = ord(c)
463 k = self.key2 | 2
464 c = c ^ (((k * (k^1)) >> 8) & 255)
465 c = chr(c)
466 self._UpdateKeys(c)
467 return c
468
Ezio Melotti9e949722012-11-18 13:18:06 +0200469
470compressor_names = {
471 0: 'store',
472 1: 'shrink',
473 2: 'reduce',
474 3: 'reduce',
475 4: 'reduce',
476 5: 'reduce',
477 6: 'implode',
478 7: 'tokenize',
479 8: 'deflate',
480 9: 'deflate64',
481 10: 'implode',
482 12: 'bzip2',
483 14: 'lzma',
484 18: 'terse',
485 19: 'lz77',
486 97: 'wavpack',
487 98: 'ppmd',
488}
489
490
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000491class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000492 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000493 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000494 """
Tim Petersea5962f2007-03-12 18:07:52 +0000495
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000496 # Max size supported by decompressor.
497 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000498
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000499 # Read from compressed files in 4k blocks.
500 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000501
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000502 # Search for universal newlines or line chunks.
503 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
504
Jesus Cea93d628b2012-11-04 02:32:08 +0100505 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
506 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000507 self._fileobj = fileobj
508 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100509 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000510
Ezio Melotti4611b052010-01-28 01:41:30 +0000511 self._compress_type = zipinfo.compress_type
512 self._compress_size = zipinfo.compress_size
513 self._compress_left = zipinfo.compress_size
514
515 if self._compress_type == ZIP_DEFLATED:
516 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti9e949722012-11-18 13:18:06 +0200517 elif self._compress_type != ZIP_STORED:
518 descr = compressor_names.get(self._compress_type)
519 if descr:
520 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
521 else:
522 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000523 self._unconsumed = ''
524
525 self._readbuffer = ''
526 self._offset = 0
527
528 self._universal = 'U' in mode
529 self.newlines = None
530
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000531 # Adjust read size for encrypted files since the first 12 bytes
532 # are for the encryption/password information.
533 if self._decrypter is not None:
534 self._compress_left -= 12
535
536 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000537 self.name = zipinfo.filename
538
Antoine Pitroue1436d12010-08-12 15:25:51 +0000539 if hasattr(zipinfo, 'CRC'):
540 self._expected_crc = zipinfo.CRC
541 self._running_crc = crc32(b'') & 0xffffffff
542 else:
543 self._expected_crc = None
544
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000545 def readline(self, limit=-1):
546 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000547
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000548 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000549 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000550
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000551 if not self._universal and limit < 0:
552 # Shortcut common case - newline found in buffer.
553 i = self._readbuffer.find('\n', self._offset) + 1
554 if i > 0:
555 line = self._readbuffer[self._offset: i]
556 self._offset = i
557 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000558
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000559 if not self._universal:
560 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000561
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000562 line = ''
563 while limit < 0 or len(line) < limit:
564 readahead = self.peek(2)
565 if readahead == '':
566 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000567
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000568 #
569 # Search for universal newlines or line chunks.
570 #
571 # The pattern returns either a line chunk or a newline, but not
572 # both. Combined with peek(2), we are assured that the sequence
573 # '\r\n' is always retrieved completely and never split into
574 # separate newlines - '\r', '\n' due to coincidental readaheads.
575 #
576 match = self.PATTERN.search(readahead)
577 newline = match.group('newline')
578 if newline is not None:
579 if self.newlines is None:
580 self.newlines = []
581 if newline not in self.newlines:
582 self.newlines.append(newline)
583 self._offset += len(newline)
584 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000585
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000586 chunk = match.group('chunk')
587 if limit >= 0:
588 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000589
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000590 self._offset += len(chunk)
591 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000592
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000593 return line
594
595 def peek(self, n=1):
596 """Returns buffered bytes without advancing the position."""
597 if n > len(self._readbuffer) - self._offset:
598 chunk = self.read(n)
599 self._offset -= len(chunk)
600
601 # Return up to 512 bytes to reduce allocation overhead for tight loops.
602 return self._readbuffer[self._offset: self._offset + 512]
603
604 def readable(self):
605 return True
606
607 def read(self, n=-1):
608 """Read and return up to n bytes.
609 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000610 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000611 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000612 if n is None:
613 n = -1
614 while True:
615 if n < 0:
616 data = self.read1(n)
617 elif n > len(buf):
618 data = self.read1(n - len(buf))
619 else:
620 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000621 if len(data) == 0:
622 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000623 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000624
Antoine Pitroue1436d12010-08-12 15:25:51 +0000625 def _update_crc(self, newdata, eof):
626 # Update the CRC using the given data.
627 if self._expected_crc is None:
628 # No need to compute the CRC if we don't have a reference value
629 return
630 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
631 # Check the CRC if we're at the end of the file
632 if eof and self._running_crc != self._expected_crc:
633 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
634
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000635 def read1(self, n):
636 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000637
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000638 # Simplify algorithm (branching) by transforming negative n to large n.
639 if n < 0 or n is None:
640 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000641
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000642 # Bytes available in read buffer.
643 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000644
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000645 # Read from file.
646 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
647 nbytes = n - len_readbuffer - len(self._unconsumed)
648 nbytes = max(nbytes, self.MIN_READ_SIZE)
649 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000650
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000651 data = self._fileobj.read(nbytes)
652 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000653
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000654 if data and self._decrypter is not None:
655 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000656
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000657 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000658 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000659 self._readbuffer = self._readbuffer[self._offset:] + data
660 self._offset = 0
661 else:
662 # Prepare deflated bytes for decompression.
663 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000664
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000665 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000666 if (len(self._unconsumed) > 0 and n > len_readbuffer and
667 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000668 data = self._decompressor.decompress(
669 self._unconsumed,
670 max(n - len_readbuffer, self.MIN_READ_SIZE)
671 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000672
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000673 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000674 eof = len(self._unconsumed) == 0 and self._compress_left == 0
675 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000676 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000677
Antoine Pitroue1436d12010-08-12 15:25:51 +0000678 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000679 self._readbuffer = self._readbuffer[self._offset:] + data
680 self._offset = 0
681
682 # Read from buffer.
683 data = self._readbuffer[self._offset: self._offset + n]
684 self._offset += len(data)
685 return data
686
Jesus Cea93d628b2012-11-04 02:32:08 +0100687 def close(self):
688 try :
689 if self._close_fileobj:
690 self._fileobj.close()
691 finally:
692 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000693
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000694
R David Murray3f4ccba2012-04-12 18:42:47 -0400695class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000696 """ Class with methods to open, read, write, close, list zip files.
697
Martin v. Löwis8c436412008-07-03 12:51:14 +0000698 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000699
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 file: Either the path to the file, or a file-like object.
701 If it is a path, the file will be opened and closed by ZipFile.
702 mode: The mode can be either read "r", write "w" or append "a".
703 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000704 allowZip64: if True ZipFile will create files with ZIP64 extensions when
705 needed, otherwise it will raise an exception when this would
706 be necessary.
707
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 """
Fred Drake484d7352000-10-02 21:14:52 +0000709
Fred Drake90eac282001-02-28 05:29:34 +0000710 fp = None # Set here since __del__ checks it
711
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000712 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000713 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000714 if mode not in ("r", "w", "a"):
715 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
716
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 if compression == ZIP_STORED:
718 pass
719 elif compression == ZIP_DEFLATED:
720 if not zlib:
721 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000722 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 else:
724 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000725
726 self._allowZip64 = allowZip64
727 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000728 self.debug = 0 # Level of printing: 0 through 3
729 self.NameToInfo = {} # Find file info given name
730 self.filelist = [] # List of ZipInfo instances for archive
731 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000732 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000733 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400734 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000735
Fred Drake3d9091e2001-03-26 15:49:24 +0000736 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000737 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000738 self._filePassed = 0
739 self.filename = file
740 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000741 try:
742 self.fp = open(file, modeDict[mode])
743 except IOError:
744 if mode == 'a':
745 mode = key = 'w'
746 self.fp = open(file, modeDict[mode])
747 else:
748 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000749 else:
750 self._filePassed = 1
751 self.fp = file
752 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000753
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100754 try:
755 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000756 self._RealGetContents()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100757 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000758 # set the modified flag so central directory gets written
759 # even if no files are added to the archive
760 self._didModify = True
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100761 elif key == 'a':
762 try:
763 # See if file is a zip file
764 self._RealGetContents()
765 # seek to start of directory and overwrite
766 self.fp.seek(self.start_dir, 0)
767 except BadZipfile:
768 # file is not a zip file, just append
769 self.fp.seek(0, 2)
770
771 # set the modified flag so central directory gets written
772 # even if no files are added to the archive
773 self._didModify = True
774 else:
775 raise RuntimeError('Mode must be "r", "w" or "a"')
776 except:
777 fp = self.fp
778 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000779 if not self._filePassed:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100780 fp.close()
781 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782
Ezio Melotti569e61f2009-12-30 06:14:51 +0000783 def __enter__(self):
784 return self
785
786 def __exit__(self, type, value, traceback):
787 self.close()
788
Tim Peters7d3bad62001-04-04 18:56:49 +0000789 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000790 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000792 try:
793 endrec = _EndRecData(fp)
794 except IOError:
795 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000796 if not endrec:
797 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 if self.debug > 1:
799 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000800 size_cd = endrec[_ECD_SIZE] # bytes in central directory
801 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400802 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000803
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000805 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000806 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
807 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000808 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
809
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000811 inferred = concat + offset_cd
812 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 # self.start_dir: Position of start of central directory
814 self.start_dir = offset_cd + concat
815 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000816 data = fp.read(size_cd)
817 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 total = 0
819 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000820 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000821 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 raise BadZipfile, "Bad magic number for central directory"
823 centdir = struct.unpack(structCentralDir, centdir)
824 if self.debug > 2:
825 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000826 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 # Create ZipInfo instance to store file information
828 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000829 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
830 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000831 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 (x.create_version, x.create_system, x.extract_version, x.reserved,
833 x.flag_bits, x.compress_type, t, d,
834 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
835 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
836 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000837 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000839 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000840
841 x._decodeExtra()
842 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000843 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 self.filelist.append(x)
845 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000846
847 # update total bytes read from central directory
848 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
849 + centdir[_CD_EXTRA_FIELD_LENGTH]
850 + centdir[_CD_COMMENT_LENGTH])
851
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 if self.debug > 2:
853 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000854
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855
856 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000857 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858 l = []
859 for data in self.filelist:
860 l.append(data.filename)
861 return l
862
863 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000864 """Return a list of class ZipInfo instances for files in the
865 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000866 return self.filelist
867
868 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000869 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000870 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
871 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000872 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
874
875 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000876 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000877 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000878 for zinfo in self.filelist:
879 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000880 # Read by chunks, to avoid an OverflowError or a
881 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100882 with self.open(zinfo.filename, "r") as f:
883 while f.read(chunk_size): # Check CRC-32
884 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000885 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000886 return zinfo.filename
887
888 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000889 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000890 info = self.NameToInfo.get(name)
891 if info is None:
892 raise KeyError(
893 'There is no item named %r in the archive' % name)
894
895 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000897 def setpassword(self, pwd):
898 """Set default password for encrypted files."""
899 self.pwd = pwd
900
R David Murray3f4ccba2012-04-12 18:42:47 -0400901 @property
902 def comment(self):
903 """The comment text associated with the ZIP file."""
904 return self._comment
905
906 @comment.setter
907 def comment(self, comment):
908 # check for valid comment length
909 if len(comment) >= ZIP_MAX_COMMENT:
910 if self.debug:
911 print('Archive comment is too long; truncating to %d bytes'
912 % ZIP_MAX_COMMENT)
913 comment = comment[:ZIP_MAX_COMMENT]
914 self._comment = comment
915 self._didModify = True
916
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000917 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000918 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000919 return self.open(name, "r", pwd).read()
920
921 def open(self, name, mode="r", pwd=None):
922 """Return file-like object for 'name'."""
923 if mode not in ("r", "U", "rU"):
924 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925 if not self.fp:
926 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000927 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000928
Tim Petersea5962f2007-03-12 18:07:52 +0000929 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000930 # given a file object in the constructor
931 if self._filePassed:
932 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100933 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000934 else:
935 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100936 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000937
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100938 try:
939 # Make sure we have an info object
940 if isinstance(name, ZipInfo):
941 # 'name' is already an info object
942 zinfo = name
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000943 else:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100944 # Get info object for name
945 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000946
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100947 zef_file.seek(zinfo.header_offset, 0)
948
949 # Skip the file header:
950 fheader = zef_file.read(sizeFileHeader)
951 if fheader[0:4] != stringFileHeader:
952 raise BadZipfile, "Bad magic number for file header"
953
954 fheader = struct.unpack(structFileHeader, fheader)
955 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
956 if fheader[_FH_EXTRA_FIELD_LENGTH]:
957 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
958
959 if fname != zinfo.orig_filename:
960 raise BadZipfile, \
961 'File name in directory "%s" and header "%s" differ.' % (
962 zinfo.orig_filename, fname)
963
964 # check for encrypted flag & handle password
965 is_encrypted = zinfo.flag_bits & 0x1
966 zd = None
967 if is_encrypted:
968 if not pwd:
969 pwd = self.pwd
970 if not pwd:
971 raise RuntimeError, "File %s is encrypted, " \
972 "password required for extraction" % name
973
974 zd = _ZipDecrypter(pwd)
975 # The first 12 bytes in the cypher stream is an encryption header
976 # used to strengthen the algorithm. The first 11 bytes are
977 # completely random, while the 12th contains the MSB of the CRC,
978 # or the MSB of the file time depending on the header type
979 # and is used to check the correctness of the password.
980 bytes = zef_file.read(12)
981 h = map(zd, bytes[0:12])
982 if zinfo.flag_bits & 0x8:
983 # compare against the file type from extended local headers
984 check_byte = (zinfo._raw_time >> 8) & 0xff
985 else:
986 # compare against the CRC otherwise
987 check_byte = (zinfo.CRC >> 24) & 0xff
988 if ord(h[11]) != check_byte:
989 raise RuntimeError("Bad password for file", name)
990
991 return ZipExtFile(zef_file, mode, zinfo, zd,
992 close_fileobj=should_close)
993 except:
994 if should_close:
995 zef_file.close()
996 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997
Georg Brandl62416bc2008-01-07 18:47:44 +0000998 def extract(self, member, path=None, pwd=None):
999 """Extract a member from the archive to the current working directory,
1000 using its full name. Its file information is extracted as accurately
1001 as possible. `member' may be a filename or a ZipInfo object. You can
1002 specify a different directory using `path'.
1003 """
1004 if not isinstance(member, ZipInfo):
1005 member = self.getinfo(member)
1006
1007 if path is None:
1008 path = os.getcwd()
1009
1010 return self._extract_member(member, path, pwd)
1011
1012 def extractall(self, path=None, members=None, pwd=None):
1013 """Extract all members from the archive to the current working
1014 directory. `path' specifies a different directory to extract to.
1015 `members' is optional and must be a subset of the list returned
1016 by namelist().
1017 """
1018 if members is None:
1019 members = self.namelist()
1020
1021 for zipinfo in members:
1022 self.extract(zipinfo, path, pwd)
1023
1024 def _extract_member(self, member, targetpath, pwd):
1025 """Extract the ZipInfo object 'member' to a physical
1026 file on the path targetpath.
1027 """
1028 # build the destination pathname, replacing
1029 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +00001030 # Strip trailing path separator, unless it represents the root.
1031 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1032 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +00001033 targetpath = targetpath[:-1]
1034
1035 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001036 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +00001037 targetpath = os.path.join(targetpath, member.filename[1:])
1038 else:
1039 targetpath = os.path.join(targetpath, member.filename)
1040
1041 targetpath = os.path.normpath(targetpath)
1042
1043 # Create all upper directories if necessary.
1044 upperdirs = os.path.dirname(targetpath)
1045 if upperdirs and not os.path.exists(upperdirs):
1046 os.makedirs(upperdirs)
1047
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001048 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001049 if not os.path.isdir(targetpath):
1050 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001051 return targetpath
1052
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001053 with self.open(member, pwd=pwd) as source, \
1054 file(targetpath, "wb") as target:
1055 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001056
1057 return targetpath
1058
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001060 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001061 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001062 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 print "Duplicate name:", zinfo.filename
1064 if self.mode not in ("w", "a"):
1065 raise RuntimeError, 'write() requires mode "w" or "a"'
1066 if not self.fp:
1067 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001068 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001069 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1070 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001071 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1073 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001074 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001075 if zinfo.file_size > ZIP64_LIMIT:
1076 if not self._allowZip64:
1077 raise LargeZipFile("Filesize would require ZIP64 extensions")
1078 if zinfo.header_offset > ZIP64_LIMIT:
1079 if not self._allowZip64:
1080 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081
1082 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001083 """Put the bytes from filename into the archive under the name
1084 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001085 if not self.fp:
1086 raise RuntimeError(
1087 "Attempt to write to ZIP archive that was already closed")
1088
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001090 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001091 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001092 date_time = mtime[0:6]
1093 # Create ZipInfo instance to store file information
1094 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001095 arcname = filename
1096 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1097 while arcname[0] in (os.sep, os.altsep):
1098 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001099 if isdir:
1100 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001101 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001102 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001104 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 else:
Tim Peterse1190062001-01-15 03:34:38 +00001106 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001107
1108 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001109 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001110 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001111
1112 self._writecheck(zinfo)
1113 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001114
1115 if isdir:
1116 zinfo.file_size = 0
1117 zinfo.compress_size = 0
1118 zinfo.CRC = 0
1119 self.filelist.append(zinfo)
1120 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001121 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001122 return
1123
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001124 with open(filename, "rb") as fp:
1125 # Must overwrite CRC and sizes with correct data later
1126 zinfo.CRC = CRC = 0
1127 zinfo.compress_size = compress_size = 0
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001128 # Compressed size can be larger than uncompressed size
1129 zip64 = self._allowZip64 and \
1130 zinfo.file_size * 1.05 > ZIP64_LIMIT
1131 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001132 if zinfo.compress_type == ZIP_DEFLATED:
1133 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1134 zlib.DEFLATED, -15)
1135 else:
1136 cmpr = None
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001137 file_size = 0
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001138 while 1:
1139 buf = fp.read(1024 * 8)
1140 if not buf:
1141 break
1142 file_size = file_size + len(buf)
1143 CRC = crc32(buf, CRC) & 0xffffffff
1144 if cmpr:
1145 buf = cmpr.compress(buf)
1146 compress_size = compress_size + len(buf)
1147 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 if cmpr:
1149 buf = cmpr.flush()
1150 compress_size = compress_size + len(buf)
1151 self.fp.write(buf)
1152 zinfo.compress_size = compress_size
1153 else:
1154 zinfo.compress_size = file_size
1155 zinfo.CRC = CRC
1156 zinfo.file_size = file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001157 if not zip64 and self._allowZip64:
1158 if file_size > ZIP64_LIMIT:
1159 raise RuntimeError('File size has increased during compressing')
1160 if compress_size > ZIP64_LIMIT:
1161 raise RuntimeError('Compressed size larger than uncompressed size')
1162 # Seek backwards and write file header (which will now include
1163 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001164 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001165 self.fp.seek(zinfo.header_offset, 0)
1166 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001167 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001168 self.filelist.append(zinfo)
1169 self.NameToInfo[zinfo.filename] = zinfo
1170
Ronald Oussorendd25e862010-02-07 20:18:02 +00001171 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001172 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001173 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1174 the name of the file in the archive."""
1175 if not isinstance(zinfo_or_arcname, ZipInfo):
1176 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001177 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001178
Just van Rossumb083cb32002-12-12 12:23:32 +00001179 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001180 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001181 else:
1182 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001183
1184 if not self.fp:
1185 raise RuntimeError(
1186 "Attempt to write to ZIP archive that was already closed")
1187
Ronald Oussorendd25e862010-02-07 20:18:02 +00001188 if compress_type is not None:
1189 zinfo.compress_type = compress_type
1190
Tim Peterse1190062001-01-15 03:34:38 +00001191 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001192 zinfo.header_offset = self.fp.tell() # Start of header bytes
1193 self._writecheck(zinfo)
1194 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001195 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196 if zinfo.compress_type == ZIP_DEFLATED:
1197 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1198 zlib.DEFLATED, -15)
1199 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001200 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 else:
1202 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001203 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1204 zinfo.compress_size > ZIP64_LIMIT
1205 if zip64 and not self._allowZip64:
1206 raise LargeZipFile("Filesize would require ZIP64 extensions")
1207 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001208 self.fp.write(bytes)
1209 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001210 # Write CRC and file sizes after the file data
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001211 fmt = '<LQQ' if zip64 else '<LLL'
1212 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001213 zinfo.file_size))
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001214 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215 self.filelist.append(zinfo)
1216 self.NameToInfo[zinfo.filename] = zinfo
1217
1218 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001219 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001220 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221
1222 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001223 """Close the file, and for mode "w" and "a" write the ending
1224 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001225 if self.fp is None:
1226 return
Tim Petersa608bb22006-06-15 18:06:29 +00001227
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001228 try:
1229 if self.mode in ("w", "a") and self._didModify: # write ending records
1230 count = 0
1231 pos1 = self.fp.tell()
1232 for zinfo in self.filelist: # write central directory
1233 count = count + 1
1234 dt = zinfo.date_time
1235 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1236 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1237 extra = []
1238 if zinfo.file_size > ZIP64_LIMIT \
1239 or zinfo.compress_size > ZIP64_LIMIT:
1240 extra.append(zinfo.file_size)
1241 extra.append(zinfo.compress_size)
1242 file_size = 0xffffffff
1243 compress_size = 0xffffffff
1244 else:
1245 file_size = zinfo.file_size
1246 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001247
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001248 if zinfo.header_offset > ZIP64_LIMIT:
1249 extra.append(zinfo.header_offset)
1250 header_offset = 0xffffffffL
1251 else:
1252 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001253
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001254 extra_data = zinfo.extra
1255 if extra:
1256 # Append a ZIP64 field to the extra's
1257 extra_data = struct.pack(
1258 '<HH' + 'Q'*len(extra),
1259 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001260
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001261 extract_version = max(45, zinfo.extract_version)
1262 create_version = max(45, zinfo.create_version)
1263 else:
1264 extract_version = zinfo.extract_version
1265 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001266
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001267 try:
1268 filename, flag_bits = zinfo._encodeFilenameFlags()
1269 centdir = struct.pack(structCentralDir,
1270 stringCentralDir, create_version,
1271 zinfo.create_system, extract_version, zinfo.reserved,
1272 flag_bits, zinfo.compress_type, dostime, dosdate,
1273 zinfo.CRC, compress_size, file_size,
1274 len(filename), len(extra_data), len(zinfo.comment),
1275 0, zinfo.internal_attr, zinfo.external_attr,
1276 header_offset)
1277 except DeprecationWarning:
1278 print >>sys.stderr, (structCentralDir,
1279 stringCentralDir, create_version,
1280 zinfo.create_system, extract_version, zinfo.reserved,
1281 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1282 zinfo.CRC, compress_size, file_size,
1283 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1284 0, zinfo.internal_attr, zinfo.external_attr,
1285 header_offset)
1286 raise
1287 self.fp.write(centdir)
1288 self.fp.write(filename)
1289 self.fp.write(extra_data)
1290 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001291
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001292 pos2 = self.fp.tell()
1293 # Write end-of-zip-archive record
1294 centDirCount = count
1295 centDirSize = pos2 - pos1
1296 centDirOffset = pos1
1297 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1298 centDirOffset > ZIP64_LIMIT or
1299 centDirSize > ZIP64_LIMIT):
1300 # Need to write the ZIP64 end-of-archive records
1301 zip64endrec = struct.pack(
1302 structEndArchive64, stringEndArchive64,
1303 44, 45, 45, 0, 0, centDirCount, centDirCount,
1304 centDirSize, centDirOffset)
1305 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001306
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001307 zip64locrec = struct.pack(
1308 structEndArchive64Locator,
1309 stringEndArchive64Locator, 0, pos2, 1)
1310 self.fp.write(zip64locrec)
1311 centDirCount = min(centDirCount, 0xFFFF)
1312 centDirSize = min(centDirSize, 0xFFFFFFFF)
1313 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001314
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001315 endrec = struct.pack(structEndArchive, stringEndArchive,
1316 0, 0, centDirCount, centDirCount,
1317 centDirSize, centDirOffset, len(self._comment))
1318 self.fp.write(endrec)
1319 self.fp.write(self._comment)
1320 self.fp.flush()
1321 finally:
1322 fp = self.fp
1323 self.fp = None
1324 if not self._filePassed:
1325 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326
1327
1328class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001329 """Class to create ZIP archives with Python library files and packages."""
1330
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001331 def writepy(self, pathname, basename = ""):
1332 """Add all files from "pathname" to the ZIP archive.
1333
Fred Drake484d7352000-10-02 21:14:52 +00001334 If pathname is a package directory, search the directory and
1335 all package subdirectories recursively for all *.py and enter
1336 the modules into the archive. If pathname is a plain
1337 directory, listdir *.py and enter all modules. Else, pathname
1338 must be a Python *.py file and the module will be put into the
1339 archive. Added modules are always module.pyo or module.pyc.
1340 This method will compile the module.py into module.pyc if
1341 necessary.
1342 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 dir, name = os.path.split(pathname)
1344 if os.path.isdir(pathname):
1345 initname = os.path.join(pathname, "__init__.py")
1346 if os.path.isfile(initname):
1347 # This is a package directory, add it
1348 if basename:
1349 basename = "%s/%s" % (basename, name)
1350 else:
1351 basename = name
1352 if self.debug:
1353 print "Adding package in", pathname, "as", basename
1354 fname, arcname = self._get_codename(initname[0:-3], basename)
1355 if self.debug:
1356 print "Adding", arcname
1357 self.write(fname, arcname)
1358 dirlist = os.listdir(pathname)
1359 dirlist.remove("__init__.py")
1360 # Add all *.py files and package subdirectories
1361 for filename in dirlist:
1362 path = os.path.join(pathname, filename)
1363 root, ext = os.path.splitext(filename)
1364 if os.path.isdir(path):
1365 if os.path.isfile(os.path.join(path, "__init__.py")):
1366 # This is a package directory, add it
1367 self.writepy(path, basename) # Recursive call
1368 elif ext == ".py":
1369 fname, arcname = self._get_codename(path[0:-3],
1370 basename)
1371 if self.debug:
1372 print "Adding", arcname
1373 self.write(fname, arcname)
1374 else:
1375 # This is NOT a package directory, add its files at top level
1376 if self.debug:
1377 print "Adding files from directory", pathname
1378 for filename in os.listdir(pathname):
1379 path = os.path.join(pathname, filename)
1380 root, ext = os.path.splitext(filename)
1381 if ext == ".py":
1382 fname, arcname = self._get_codename(path[0:-3],
1383 basename)
1384 if self.debug:
1385 print "Adding", arcname
1386 self.write(fname, arcname)
1387 else:
1388 if pathname[-3:] != ".py":
1389 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001390 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391 fname, arcname = self._get_codename(pathname[0:-3], basename)
1392 if self.debug:
1393 print "Adding file", arcname
1394 self.write(fname, arcname)
1395
1396 def _get_codename(self, pathname, basename):
1397 """Return (filename, archivename) for the path.
1398
Fred Drake484d7352000-10-02 21:14:52 +00001399 Given a module name path, return the correct file path and
1400 archive name, compiling if necessary. For example, given
1401 /python/lib/string, return (/python/lib/string.pyc, string).
1402 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001403 file_py = pathname + ".py"
1404 file_pyc = pathname + ".pyc"
1405 file_pyo = pathname + ".pyo"
1406 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001407 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001408 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001410 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001411 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001412 if self.debug:
1413 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001414 try:
1415 py_compile.compile(file_py, file_pyc, None, True)
1416 except py_compile.PyCompileError,err:
1417 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001418 fname = file_pyc
1419 else:
1420 fname = file_pyc
1421 archivename = os.path.split(fname)[1]
1422 if basename:
1423 archivename = "%s/%s" % (basename, archivename)
1424 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001425
1426
1427def main(args = None):
1428 import textwrap
1429 USAGE=textwrap.dedent("""\
1430 Usage:
1431 zipfile.py -l zipfile.zip # Show listing of a zipfile
1432 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1433 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1434 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1435 """)
1436 if args is None:
1437 args = sys.argv[1:]
1438
1439 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1440 print USAGE
1441 sys.exit(1)
1442
1443 if args[0] == '-l':
1444 if len(args) != 2:
1445 print USAGE
1446 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001447 with ZipFile(args[1], 'r') as zf:
1448 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001449
1450 elif args[0] == '-t':
1451 if len(args) != 2:
1452 print USAGE
1453 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001454 with ZipFile(args[1], 'r') as zf:
1455 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001456 if badfile:
1457 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001458 print "Done testing"
1459
1460 elif args[0] == '-e':
1461 if len(args) != 3:
1462 print USAGE
1463 sys.exit(1)
1464
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001465 with ZipFile(args[1], 'r') as zf:
1466 out = args[2]
1467 for path in zf.namelist():
1468 if path.startswith('./'):
1469 tgt = os.path.join(out, path[2:])
1470 else:
1471 tgt = os.path.join(out, path)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001472
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001473 tgtdir = os.path.dirname(tgt)
1474 if not os.path.exists(tgtdir):
1475 os.makedirs(tgtdir)
1476 with open(tgt, 'wb') as fp:
1477 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001478
1479 elif args[0] == '-c':
1480 if len(args) < 3:
1481 print USAGE
1482 sys.exit(1)
1483
1484 def addToZip(zf, path, zippath):
1485 if os.path.isfile(path):
1486 zf.write(path, zippath, ZIP_DEFLATED)
1487 elif os.path.isdir(path):
1488 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001489 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001490 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001491 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001492
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001493 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1494 for src in args[2:]:
1495 addToZip(zf, src, os.path.basename(src))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001496
1497if __name__ == "__main__":
1498 main()