blob: 6e776882539dab9c224bd9c2530f9360e370cdfb [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800293
294 if date_time[0] < 1980:
295 raise ValueError('ZIP does not support timestamps before 1980')
296
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.compress_type = ZIP_STORED # Type of compression for the file
299 self.comment = "" # Comment for each file
300 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000301 if sys.platform == 'win32':
302 self.create_system = 0 # System which created ZIP archive
303 else:
304 # Assume everything else is unix-y
305 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.create_version = 20 # Version which created ZIP archive
307 self.extract_version = 20 # Version needed to extract archive
308 self.reserved = 0 # Must be zero
309 self.flag_bits = 0 # ZIP flag bits
310 self.volume = 0 # Volume number of file header
311 self.internal_attr = 0 # Internal attributes
312 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000315 # CRC CRC-32 of the uncompressed file
316 # compress_size Size of the compressed file
317 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
319 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 dt = self.date_time
322 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000323 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000325 # Set these to zero because we write them after the file data
326 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 else:
Tim Peterse1190062001-01-15 03:34:38 +0000328 CRC = self.CRC
329 compress_size = self.compress_size
330 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000331
332 extra = self.extra
333
334 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
335 # File is larger than what fits into a 4 byte integer,
336 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000337 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 extra = extra + struct.pack(fmt,
339 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000340 file_size = 0xffffffff
341 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000342 self.extract_version = max(45, self.extract_version)
343 self.create_version = max(45, self.extract_version)
344
Martin v. Löwis471617d2008-05-05 17:16:58 +0000345 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000346 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000347 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 self.compress_type, dostime, dosdate, CRC,
349 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000350 len(filename), len(extra))
351 return header + filename + extra
352
353 def _encodeFilenameFlags(self):
354 if isinstance(self.filename, unicode):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
359 else:
360 return self.filename, self.flag_bits
361
362 def _decodeFilename(self):
363 if self.flag_bits & 0x800:
364 return self.filename.decode('utf-8')
365 else:
366 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 def _decodeExtra(self):
369 # Try to decode the extra field.
370 extra = self.extra
371 unpack = struct.unpack
372 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000373 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000374 if tp == 1:
375 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000380 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 elif ln == 0:
382 counts = ()
383 else:
384 raise RuntimeError, "Corrupt extra field %s"%(ln,)
385
386 idx = 0
387
388 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000389 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 self.file_size = counts[idx]
391 idx += 1
392
Martin v. Löwis8c436412008-07-03 12:51:14 +0000393 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 self.compress_size = counts[idx]
395 idx += 1
396
Martin v. Löwis8c436412008-07-03 12:51:14 +0000397 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000398 old = self.header_offset
399 self.header_offset = counts[idx]
400 idx+=1
401
402 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000403
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000405class _ZipDecrypter:
406 """Class to handle decryption of files stored within a ZIP archive.
407
408 ZIP supports a password-based form of encryption. Even though known
409 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000410 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000411
412 Usage:
413 zd = _ZipDecrypter(mypwd)
414 plain_char = zd(cypher_char)
415 plain_text = map(zd, cypher_text)
416 """
417
418 def _GenerateCRCTable():
419 """Generate a CRC-32 table.
420
421 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
422 internal keys. We noticed that a direct implementation is faster than
423 relying on binascii.crc32().
424 """
425 poly = 0xedb88320
426 table = [0] * 256
427 for i in range(256):
428 crc = i
429 for j in range(8):
430 if crc & 1:
431 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
432 else:
433 crc = ((crc >> 1) & 0x7FFFFFFF)
434 table[i] = crc
435 return table
436 crctable = _GenerateCRCTable()
437
438 def _crc32(self, ch, crc):
439 """Compute the CRC32 primitive on one byte."""
440 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
441
442 def __init__(self, pwd):
443 self.key0 = 305419896
444 self.key1 = 591751049
445 self.key2 = 878082192
446 for p in pwd:
447 self._UpdateKeys(p)
448
449 def _UpdateKeys(self, c):
450 self.key0 = self._crc32(c, self.key0)
451 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
452 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
453 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
454
455 def __call__(self, c):
456 """Decrypt a single character."""
457 c = ord(c)
458 k = self.key2 | 2
459 c = c ^ (((k * (k^1)) >> 8) & 255)
460 c = chr(c)
461 self._UpdateKeys(c)
462 return c
463
Ezio Melotti9e949722012-11-18 13:18:06 +0200464
465compressor_names = {
466 0: 'store',
467 1: 'shrink',
468 2: 'reduce',
469 3: 'reduce',
470 4: 'reduce',
471 5: 'reduce',
472 6: 'implode',
473 7: 'tokenize',
474 8: 'deflate',
475 9: 'deflate64',
476 10: 'implode',
477 12: 'bzip2',
478 14: 'lzma',
479 18: 'terse',
480 19: 'lz77',
481 97: 'wavpack',
482 98: 'ppmd',
483}
484
485
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000486class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000487 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000488 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000489 """
Tim Petersea5962f2007-03-12 18:07:52 +0000490
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000491 # Max size supported by decompressor.
492 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000493
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000494 # Read from compressed files in 4k blocks.
495 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000496
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000497 # Search for universal newlines or line chunks.
498 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
499
Jesus Cea93d628b2012-11-04 02:32:08 +0100500 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
501 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000502 self._fileobj = fileobj
503 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100504 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000505
Ezio Melotti4611b052010-01-28 01:41:30 +0000506 self._compress_type = zipinfo.compress_type
507 self._compress_size = zipinfo.compress_size
508 self._compress_left = zipinfo.compress_size
509
510 if self._compress_type == ZIP_DEFLATED:
511 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti9e949722012-11-18 13:18:06 +0200512 elif self._compress_type != ZIP_STORED:
513 descr = compressor_names.get(self._compress_type)
514 if descr:
515 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
516 else:
517 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000518 self._unconsumed = ''
519
520 self._readbuffer = ''
521 self._offset = 0
522
523 self._universal = 'U' in mode
524 self.newlines = None
525
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000526 # Adjust read size for encrypted files since the first 12 bytes
527 # are for the encryption/password information.
528 if self._decrypter is not None:
529 self._compress_left -= 12
530
531 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000532 self.name = zipinfo.filename
533
Antoine Pitroue1436d12010-08-12 15:25:51 +0000534 if hasattr(zipinfo, 'CRC'):
535 self._expected_crc = zipinfo.CRC
536 self._running_crc = crc32(b'') & 0xffffffff
537 else:
538 self._expected_crc = None
539
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000540 def readline(self, limit=-1):
541 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000542
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000543 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000544 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000545
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000546 if not self._universal and limit < 0:
547 # Shortcut common case - newline found in buffer.
548 i = self._readbuffer.find('\n', self._offset) + 1
549 if i > 0:
550 line = self._readbuffer[self._offset: i]
551 self._offset = i
552 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000553
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000554 if not self._universal:
555 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000556
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000557 line = ''
558 while limit < 0 or len(line) < limit:
559 readahead = self.peek(2)
560 if readahead == '':
561 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000562
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000563 #
564 # Search for universal newlines or line chunks.
565 #
566 # The pattern returns either a line chunk or a newline, but not
567 # both. Combined with peek(2), we are assured that the sequence
568 # '\r\n' is always retrieved completely and never split into
569 # separate newlines - '\r', '\n' due to coincidental readaheads.
570 #
571 match = self.PATTERN.search(readahead)
572 newline = match.group('newline')
573 if newline is not None:
574 if self.newlines is None:
575 self.newlines = []
576 if newline not in self.newlines:
577 self.newlines.append(newline)
578 self._offset += len(newline)
579 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000580
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000581 chunk = match.group('chunk')
582 if limit >= 0:
583 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000584
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000585 self._offset += len(chunk)
586 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000587
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 return line
589
590 def peek(self, n=1):
591 """Returns buffered bytes without advancing the position."""
592 if n > len(self._readbuffer) - self._offset:
593 chunk = self.read(n)
594 self._offset -= len(chunk)
595
596 # Return up to 512 bytes to reduce allocation overhead for tight loops.
597 return self._readbuffer[self._offset: self._offset + 512]
598
599 def readable(self):
600 return True
601
602 def read(self, n=-1):
603 """Read and return up to n bytes.
604 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000605 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000606 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000607 if n is None:
608 n = -1
609 while True:
610 if n < 0:
611 data = self.read1(n)
612 elif n > len(buf):
613 data = self.read1(n - len(buf))
614 else:
615 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000616 if len(data) == 0:
617 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000618 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000619
Antoine Pitroue1436d12010-08-12 15:25:51 +0000620 def _update_crc(self, newdata, eof):
621 # Update the CRC using the given data.
622 if self._expected_crc is None:
623 # No need to compute the CRC if we don't have a reference value
624 return
625 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
626 # Check the CRC if we're at the end of the file
627 if eof and self._running_crc != self._expected_crc:
628 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
629
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 def read1(self, n):
631 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000632
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000633 # Simplify algorithm (branching) by transforming negative n to large n.
634 if n < 0 or n is None:
635 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000636
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000637 # Bytes available in read buffer.
638 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000639
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000640 # Read from file.
641 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
642 nbytes = n - len_readbuffer - len(self._unconsumed)
643 nbytes = max(nbytes, self.MIN_READ_SIZE)
644 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000645
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000646 data = self._fileobj.read(nbytes)
647 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000648
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000649 if data and self._decrypter is not None:
650 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000651
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000652 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000653 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000654 self._readbuffer = self._readbuffer[self._offset:] + data
655 self._offset = 0
656 else:
657 # Prepare deflated bytes for decompression.
658 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000659
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000660 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000661 if (len(self._unconsumed) > 0 and n > len_readbuffer and
662 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000663 data = self._decompressor.decompress(
664 self._unconsumed,
665 max(n - len_readbuffer, self.MIN_READ_SIZE)
666 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000667
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000668 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000669 eof = len(self._unconsumed) == 0 and self._compress_left == 0
670 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000671 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000672
Antoine Pitroue1436d12010-08-12 15:25:51 +0000673 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000674 self._readbuffer = self._readbuffer[self._offset:] + data
675 self._offset = 0
676
677 # Read from buffer.
678 data = self._readbuffer[self._offset: self._offset + n]
679 self._offset += len(data)
680 return data
681
Jesus Cea93d628b2012-11-04 02:32:08 +0100682 def close(self):
683 try :
684 if self._close_fileobj:
685 self._fileobj.close()
686 finally:
687 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000688
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000689
R David Murray3f4ccba2012-04-12 18:42:47 -0400690class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000691 """ Class with methods to open, read, write, close, list zip files.
692
Martin v. Löwis8c436412008-07-03 12:51:14 +0000693 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000694
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 file: Either the path to the file, or a file-like object.
696 If it is a path, the file will be opened and closed by ZipFile.
697 mode: The mode can be either read "r", write "w" or append "a".
698 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000699 allowZip64: if True ZipFile will create files with ZIP64 extensions when
700 needed, otherwise it will raise an exception when this would
701 be necessary.
702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 """
Fred Drake484d7352000-10-02 21:14:52 +0000704
Fred Drake90eac282001-02-28 05:29:34 +0000705 fp = None # Set here since __del__ checks it
706
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000707 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000708 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000709 if mode not in ("r", "w", "a"):
710 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
711
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 if compression == ZIP_STORED:
713 pass
714 elif compression == ZIP_DEFLATED:
715 if not zlib:
716 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000717 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 else:
719 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000720
721 self._allowZip64 = allowZip64
722 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000723 self.debug = 0 # Level of printing: 0 through 3
724 self.NameToInfo = {} # Find file info given name
725 self.filelist = [] # List of ZipInfo instances for archive
726 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000727 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000728 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400729 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000730
Fred Drake3d9091e2001-03-26 15:49:24 +0000731 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000732 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000733 self._filePassed = 0
734 self.filename = file
735 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000736 try:
737 self.fp = open(file, modeDict[mode])
738 except IOError:
739 if mode == 'a':
740 mode = key = 'w'
741 self.fp = open(file, modeDict[mode])
742 else:
743 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000744 else:
745 self._filePassed = 1
746 self.fp = file
747 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000748
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100749 try:
750 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000751 self._RealGetContents()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100752 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000753 # set the modified flag so central directory gets written
754 # even if no files are added to the archive
755 self._didModify = True
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100756 elif key == 'a':
757 try:
758 # See if file is a zip file
759 self._RealGetContents()
760 # seek to start of directory and overwrite
761 self.fp.seek(self.start_dir, 0)
762 except BadZipfile:
763 # file is not a zip file, just append
764 self.fp.seek(0, 2)
765
766 # set the modified flag so central directory gets written
767 # even if no files are added to the archive
768 self._didModify = True
769 else:
770 raise RuntimeError('Mode must be "r", "w" or "a"')
771 except:
772 fp = self.fp
773 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000774 if not self._filePassed:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100775 fp.close()
776 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777
Ezio Melotti569e61f2009-12-30 06:14:51 +0000778 def __enter__(self):
779 return self
780
781 def __exit__(self, type, value, traceback):
782 self.close()
783
Tim Peters7d3bad62001-04-04 18:56:49 +0000784 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000785 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000787 try:
788 endrec = _EndRecData(fp)
789 except IOError:
790 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000791 if not endrec:
792 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 if self.debug > 1:
794 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000795 size_cd = endrec[_ECD_SIZE] # bytes in central directory
796 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400797 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000798
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000800 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000801 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
802 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000803 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
804
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000806 inferred = concat + offset_cd
807 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000808 # self.start_dir: Position of start of central directory
809 self.start_dir = offset_cd + concat
810 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000811 data = fp.read(size_cd)
812 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 total = 0
814 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000815 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000816 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 raise BadZipfile, "Bad magic number for central directory"
818 centdir = struct.unpack(structCentralDir, centdir)
819 if self.debug > 2:
820 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000821 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 # Create ZipInfo instance to store file information
823 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000824 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
825 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000826 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 (x.create_version, x.create_system, x.extract_version, x.reserved,
828 x.flag_bits, x.compress_type, t, d,
829 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
830 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
831 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000832 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000834 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000835
836 x._decodeExtra()
837 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000838 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000839 self.filelist.append(x)
840 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000841
842 # update total bytes read from central directory
843 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
844 + centdir[_CD_EXTRA_FIELD_LENGTH]
845 + centdir[_CD_COMMENT_LENGTH])
846
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000847 if self.debug > 2:
848 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000849
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850
851 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000852 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000853 l = []
854 for data in self.filelist:
855 l.append(data.filename)
856 return l
857
858 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000859 """Return a list of class ZipInfo instances for files in the
860 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 return self.filelist
862
863 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000864 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
866 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000867 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000868 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
869
870 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000871 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000872 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873 for zinfo in self.filelist:
874 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000875 # Read by chunks, to avoid an OverflowError or a
876 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100877 with self.open(zinfo.filename, "r") as f:
878 while f.read(chunk_size): # Check CRC-32
879 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000880 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 return zinfo.filename
882
883 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000884 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000885 info = self.NameToInfo.get(name)
886 if info is None:
887 raise KeyError(
888 'There is no item named %r in the archive' % name)
889
890 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000891
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000892 def setpassword(self, pwd):
893 """Set default password for encrypted files."""
894 self.pwd = pwd
895
R David Murray3f4ccba2012-04-12 18:42:47 -0400896 @property
897 def comment(self):
898 """The comment text associated with the ZIP file."""
899 return self._comment
900
901 @comment.setter
902 def comment(self, comment):
903 # check for valid comment length
904 if len(comment) >= ZIP_MAX_COMMENT:
905 if self.debug:
906 print('Archive comment is too long; truncating to %d bytes'
907 % ZIP_MAX_COMMENT)
908 comment = comment[:ZIP_MAX_COMMENT]
909 self._comment = comment
910 self._didModify = True
911
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000912 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000913 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000914 return self.open(name, "r", pwd).read()
915
916 def open(self, name, mode="r", pwd=None):
917 """Return file-like object for 'name'."""
918 if mode not in ("r", "U", "rU"):
919 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000920 if not self.fp:
921 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000922 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000923
Tim Petersea5962f2007-03-12 18:07:52 +0000924 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000925 # given a file object in the constructor
926 if self._filePassed:
927 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100928 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000929 else:
930 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100931 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000932
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100933 try:
934 # Make sure we have an info object
935 if isinstance(name, ZipInfo):
936 # 'name' is already an info object
937 zinfo = name
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000938 else:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100939 # Get info object for name
940 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000941
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100942 zef_file.seek(zinfo.header_offset, 0)
943
944 # Skip the file header:
945 fheader = zef_file.read(sizeFileHeader)
946 if fheader[0:4] != stringFileHeader:
947 raise BadZipfile, "Bad magic number for file header"
948
949 fheader = struct.unpack(structFileHeader, fheader)
950 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
951 if fheader[_FH_EXTRA_FIELD_LENGTH]:
952 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
953
954 if fname != zinfo.orig_filename:
955 raise BadZipfile, \
956 'File name in directory "%s" and header "%s" differ.' % (
957 zinfo.orig_filename, fname)
958
959 # check for encrypted flag & handle password
960 is_encrypted = zinfo.flag_bits & 0x1
961 zd = None
962 if is_encrypted:
963 if not pwd:
964 pwd = self.pwd
965 if not pwd:
966 raise RuntimeError, "File %s is encrypted, " \
967 "password required for extraction" % name
968
969 zd = _ZipDecrypter(pwd)
970 # The first 12 bytes in the cypher stream is an encryption header
971 # used to strengthen the algorithm. The first 11 bytes are
972 # completely random, while the 12th contains the MSB of the CRC,
973 # or the MSB of the file time depending on the header type
974 # and is used to check the correctness of the password.
975 bytes = zef_file.read(12)
976 h = map(zd, bytes[0:12])
977 if zinfo.flag_bits & 0x8:
978 # compare against the file type from extended local headers
979 check_byte = (zinfo._raw_time >> 8) & 0xff
980 else:
981 # compare against the CRC otherwise
982 check_byte = (zinfo.CRC >> 24) & 0xff
983 if ord(h[11]) != check_byte:
984 raise RuntimeError("Bad password for file", name)
985
986 return ZipExtFile(zef_file, mode, zinfo, zd,
987 close_fileobj=should_close)
988 except:
989 if should_close:
990 zef_file.close()
991 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992
Georg Brandl62416bc2008-01-07 18:47:44 +0000993 def extract(self, member, path=None, pwd=None):
994 """Extract a member from the archive to the current working directory,
995 using its full name. Its file information is extracted as accurately
996 as possible. `member' may be a filename or a ZipInfo object. You can
997 specify a different directory using `path'.
998 """
999 if not isinstance(member, ZipInfo):
1000 member = self.getinfo(member)
1001
1002 if path is None:
1003 path = os.getcwd()
1004
1005 return self._extract_member(member, path, pwd)
1006
1007 def extractall(self, path=None, members=None, pwd=None):
1008 """Extract all members from the archive to the current working
1009 directory. `path' specifies a different directory to extract to.
1010 `members' is optional and must be a subset of the list returned
1011 by namelist().
1012 """
1013 if members is None:
1014 members = self.namelist()
1015
1016 for zipinfo in members:
1017 self.extract(zipinfo, path, pwd)
1018
1019 def _extract_member(self, member, targetpath, pwd):
1020 """Extract the ZipInfo object 'member' to a physical
1021 file on the path targetpath.
1022 """
1023 # build the destination pathname, replacing
1024 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +00001025 # Strip trailing path separator, unless it represents the root.
1026 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1027 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +00001028 targetpath = targetpath[:-1]
1029
1030 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001031 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +00001032 targetpath = os.path.join(targetpath, member.filename[1:])
1033 else:
1034 targetpath = os.path.join(targetpath, member.filename)
1035
1036 targetpath = os.path.normpath(targetpath)
1037
1038 # Create all upper directories if necessary.
1039 upperdirs = os.path.dirname(targetpath)
1040 if upperdirs and not os.path.exists(upperdirs):
1041 os.makedirs(upperdirs)
1042
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001043 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001044 if not os.path.isdir(targetpath):
1045 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001046 return targetpath
1047
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001048 with self.open(member, pwd=pwd) as source, \
1049 file(targetpath, "wb") as target:
1050 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001051
1052 return targetpath
1053
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001055 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001056 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001057 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001058 print "Duplicate name:", zinfo.filename
1059 if self.mode not in ("w", "a"):
1060 raise RuntimeError, 'write() requires mode "w" or "a"'
1061 if not self.fp:
1062 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001063 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1065 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001066 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001067 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1068 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001069 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001070 if zinfo.file_size > ZIP64_LIMIT:
1071 if not self._allowZip64:
1072 raise LargeZipFile("Filesize would require ZIP64 extensions")
1073 if zinfo.header_offset > ZIP64_LIMIT:
1074 if not self._allowZip64:
1075 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076
1077 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001078 """Put the bytes from filename into the archive under the name
1079 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001080 if not self.fp:
1081 raise RuntimeError(
1082 "Attempt to write to ZIP archive that was already closed")
1083
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001084 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001085 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001086 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 date_time = mtime[0:6]
1088 # Create ZipInfo instance to store file information
1089 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001090 arcname = filename
1091 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1092 while arcname[0] in (os.sep, os.altsep):
1093 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001094 if isdir:
1095 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001096 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001097 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001099 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 else:
Tim Peterse1190062001-01-15 03:34:38 +00001101 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001102
1103 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001104 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001105 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001106
1107 self._writecheck(zinfo)
1108 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001109
1110 if isdir:
1111 zinfo.file_size = 0
1112 zinfo.compress_size = 0
1113 zinfo.CRC = 0
1114 self.filelist.append(zinfo)
1115 self.NameToInfo[zinfo.filename] = zinfo
1116 self.fp.write(zinfo.FileHeader())
1117 return
1118
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001119 with open(filename, "rb") as fp:
1120 # Must overwrite CRC and sizes with correct data later
1121 zinfo.CRC = CRC = 0
1122 zinfo.compress_size = compress_size = 0
1123 zinfo.file_size = file_size = 0
1124 self.fp.write(zinfo.FileHeader())
1125 if zinfo.compress_type == ZIP_DEFLATED:
1126 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1127 zlib.DEFLATED, -15)
1128 else:
1129 cmpr = None
1130 while 1:
1131 buf = fp.read(1024 * 8)
1132 if not buf:
1133 break
1134 file_size = file_size + len(buf)
1135 CRC = crc32(buf, CRC) & 0xffffffff
1136 if cmpr:
1137 buf = cmpr.compress(buf)
1138 compress_size = compress_size + len(buf)
1139 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 if cmpr:
1141 buf = cmpr.flush()
1142 compress_size = compress_size + len(buf)
1143 self.fp.write(buf)
1144 zinfo.compress_size = compress_size
1145 else:
1146 zinfo.compress_size = file_size
1147 zinfo.CRC = CRC
1148 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001149 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001150 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001151 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001152 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001153 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001154 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155 self.filelist.append(zinfo)
1156 self.NameToInfo[zinfo.filename] = zinfo
1157
Ronald Oussorendd25e862010-02-07 20:18:02 +00001158 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001159 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001160 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1161 the name of the file in the archive."""
1162 if not isinstance(zinfo_or_arcname, ZipInfo):
1163 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001164 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001165
Just van Rossumb083cb32002-12-12 12:23:32 +00001166 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001167 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001168 else:
1169 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001170
1171 if not self.fp:
1172 raise RuntimeError(
1173 "Attempt to write to ZIP archive that was already closed")
1174
Ronald Oussorendd25e862010-02-07 20:18:02 +00001175 if compress_type is not None:
1176 zinfo.compress_type = compress_type
1177
Tim Peterse1190062001-01-15 03:34:38 +00001178 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001179 zinfo.header_offset = self.fp.tell() # Start of header bytes
1180 self._writecheck(zinfo)
1181 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001182 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 if zinfo.compress_type == ZIP_DEFLATED:
1184 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1185 zlib.DEFLATED, -15)
1186 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001187 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 else:
1189 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001190 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001193 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001195 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001196 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001197 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 self.filelist.append(zinfo)
1199 self.NameToInfo[zinfo.filename] = zinfo
1200
1201 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001202 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001203 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204
1205 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001206 """Close the file, and for mode "w" and "a" write the ending
1207 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001208 if self.fp is None:
1209 return
Tim Petersa608bb22006-06-15 18:06:29 +00001210
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001211 try:
1212 if self.mode in ("w", "a") and self._didModify: # write ending records
1213 count = 0
1214 pos1 = self.fp.tell()
1215 for zinfo in self.filelist: # write central directory
1216 count = count + 1
1217 dt = zinfo.date_time
1218 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1219 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1220 extra = []
1221 if zinfo.file_size > ZIP64_LIMIT \
1222 or zinfo.compress_size > ZIP64_LIMIT:
1223 extra.append(zinfo.file_size)
1224 extra.append(zinfo.compress_size)
1225 file_size = 0xffffffff
1226 compress_size = 0xffffffff
1227 else:
1228 file_size = zinfo.file_size
1229 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001230
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001231 if zinfo.header_offset > ZIP64_LIMIT:
1232 extra.append(zinfo.header_offset)
1233 header_offset = 0xffffffffL
1234 else:
1235 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001236
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001237 extra_data = zinfo.extra
1238 if extra:
1239 # Append a ZIP64 field to the extra's
1240 extra_data = struct.pack(
1241 '<HH' + 'Q'*len(extra),
1242 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001243
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001244 extract_version = max(45, zinfo.extract_version)
1245 create_version = max(45, zinfo.create_version)
1246 else:
1247 extract_version = zinfo.extract_version
1248 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001249
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001250 try:
1251 filename, flag_bits = zinfo._encodeFilenameFlags()
1252 centdir = struct.pack(structCentralDir,
1253 stringCentralDir, create_version,
1254 zinfo.create_system, extract_version, zinfo.reserved,
1255 flag_bits, zinfo.compress_type, dostime, dosdate,
1256 zinfo.CRC, compress_size, file_size,
1257 len(filename), len(extra_data), len(zinfo.comment),
1258 0, zinfo.internal_attr, zinfo.external_attr,
1259 header_offset)
1260 except DeprecationWarning:
1261 print >>sys.stderr, (structCentralDir,
1262 stringCentralDir, create_version,
1263 zinfo.create_system, extract_version, zinfo.reserved,
1264 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1265 zinfo.CRC, compress_size, file_size,
1266 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1267 0, zinfo.internal_attr, zinfo.external_attr,
1268 header_offset)
1269 raise
1270 self.fp.write(centdir)
1271 self.fp.write(filename)
1272 self.fp.write(extra_data)
1273 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001274
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001275 pos2 = self.fp.tell()
1276 # Write end-of-zip-archive record
1277 centDirCount = count
1278 centDirSize = pos2 - pos1
1279 centDirOffset = pos1
1280 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1281 centDirOffset > ZIP64_LIMIT or
1282 centDirSize > ZIP64_LIMIT):
1283 # Need to write the ZIP64 end-of-archive records
1284 zip64endrec = struct.pack(
1285 structEndArchive64, stringEndArchive64,
1286 44, 45, 45, 0, 0, centDirCount, centDirCount,
1287 centDirSize, centDirOffset)
1288 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001289
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001290 zip64locrec = struct.pack(
1291 structEndArchive64Locator,
1292 stringEndArchive64Locator, 0, pos2, 1)
1293 self.fp.write(zip64locrec)
1294 centDirCount = min(centDirCount, 0xFFFF)
1295 centDirSize = min(centDirSize, 0xFFFFFFFF)
1296 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001297
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001298 endrec = struct.pack(structEndArchive, stringEndArchive,
1299 0, 0, centDirCount, centDirCount,
1300 centDirSize, centDirOffset, len(self._comment))
1301 self.fp.write(endrec)
1302 self.fp.write(self._comment)
1303 self.fp.flush()
1304 finally:
1305 fp = self.fp
1306 self.fp = None
1307 if not self._filePassed:
1308 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001309
1310
1311class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001312 """Class to create ZIP archives with Python library files and packages."""
1313
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314 def writepy(self, pathname, basename = ""):
1315 """Add all files from "pathname" to the ZIP archive.
1316
Fred Drake484d7352000-10-02 21:14:52 +00001317 If pathname is a package directory, search the directory and
1318 all package subdirectories recursively for all *.py and enter
1319 the modules into the archive. If pathname is a plain
1320 directory, listdir *.py and enter all modules. Else, pathname
1321 must be a Python *.py file and the module will be put into the
1322 archive. Added modules are always module.pyo or module.pyc.
1323 This method will compile the module.py into module.pyc if
1324 necessary.
1325 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 dir, name = os.path.split(pathname)
1327 if os.path.isdir(pathname):
1328 initname = os.path.join(pathname, "__init__.py")
1329 if os.path.isfile(initname):
1330 # This is a package directory, add it
1331 if basename:
1332 basename = "%s/%s" % (basename, name)
1333 else:
1334 basename = name
1335 if self.debug:
1336 print "Adding package in", pathname, "as", basename
1337 fname, arcname = self._get_codename(initname[0:-3], basename)
1338 if self.debug:
1339 print "Adding", arcname
1340 self.write(fname, arcname)
1341 dirlist = os.listdir(pathname)
1342 dirlist.remove("__init__.py")
1343 # Add all *.py files and package subdirectories
1344 for filename in dirlist:
1345 path = os.path.join(pathname, filename)
1346 root, ext = os.path.splitext(filename)
1347 if os.path.isdir(path):
1348 if os.path.isfile(os.path.join(path, "__init__.py")):
1349 # This is a package directory, add it
1350 self.writepy(path, basename) # Recursive call
1351 elif ext == ".py":
1352 fname, arcname = self._get_codename(path[0:-3],
1353 basename)
1354 if self.debug:
1355 print "Adding", arcname
1356 self.write(fname, arcname)
1357 else:
1358 # This is NOT a package directory, add its files at top level
1359 if self.debug:
1360 print "Adding files from directory", pathname
1361 for filename in os.listdir(pathname):
1362 path = os.path.join(pathname, filename)
1363 root, ext = os.path.splitext(filename)
1364 if ext == ".py":
1365 fname, arcname = self._get_codename(path[0:-3],
1366 basename)
1367 if self.debug:
1368 print "Adding", arcname
1369 self.write(fname, arcname)
1370 else:
1371 if pathname[-3:] != ".py":
1372 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001373 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001374 fname, arcname = self._get_codename(pathname[0:-3], basename)
1375 if self.debug:
1376 print "Adding file", arcname
1377 self.write(fname, arcname)
1378
1379 def _get_codename(self, pathname, basename):
1380 """Return (filename, archivename) for the path.
1381
Fred Drake484d7352000-10-02 21:14:52 +00001382 Given a module name path, return the correct file path and
1383 archive name, compiling if necessary. For example, given
1384 /python/lib/string, return (/python/lib/string.pyc, string).
1385 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386 file_py = pathname + ".py"
1387 file_pyc = pathname + ".pyc"
1388 file_pyo = pathname + ".pyo"
1389 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001390 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001391 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001393 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001394 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001395 if self.debug:
1396 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001397 try:
1398 py_compile.compile(file_py, file_pyc, None, True)
1399 except py_compile.PyCompileError,err:
1400 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001401 fname = file_pyc
1402 else:
1403 fname = file_pyc
1404 archivename = os.path.split(fname)[1]
1405 if basename:
1406 archivename = "%s/%s" % (basename, archivename)
1407 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001408
1409
1410def main(args = None):
1411 import textwrap
1412 USAGE=textwrap.dedent("""\
1413 Usage:
1414 zipfile.py -l zipfile.zip # Show listing of a zipfile
1415 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1416 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1417 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1418 """)
1419 if args is None:
1420 args = sys.argv[1:]
1421
1422 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1423 print USAGE
1424 sys.exit(1)
1425
1426 if args[0] == '-l':
1427 if len(args) != 2:
1428 print USAGE
1429 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001430 with ZipFile(args[1], 'r') as zf:
1431 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001432
1433 elif args[0] == '-t':
1434 if len(args) != 2:
1435 print USAGE
1436 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001437 with ZipFile(args[1], 'r') as zf:
1438 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001439 if badfile:
1440 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001441 print "Done testing"
1442
1443 elif args[0] == '-e':
1444 if len(args) != 3:
1445 print USAGE
1446 sys.exit(1)
1447
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001448 with ZipFile(args[1], 'r') as zf:
1449 out = args[2]
1450 for path in zf.namelist():
1451 if path.startswith('./'):
1452 tgt = os.path.join(out, path[2:])
1453 else:
1454 tgt = os.path.join(out, path)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001455
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001456 tgtdir = os.path.dirname(tgt)
1457 if not os.path.exists(tgtdir):
1458 os.makedirs(tgtdir)
1459 with open(tgt, 'wb') as fp:
1460 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001461
1462 elif args[0] == '-c':
1463 if len(args) < 3:
1464 print USAGE
1465 sys.exit(1)
1466
1467 def addToZip(zf, path, zippath):
1468 if os.path.isfile(path):
1469 zf.write(path, zippath, ZIP_DEFLATED)
1470 elif os.path.isdir(path):
1471 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001472 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001473 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001474 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001475
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001476 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1477 for src in args[2:]:
1478 addToZip(zf, src, os.path.basename(src))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001479
1480if __name__ == "__main__":
1481 main()