blob: ef19a28513714f0f082f414e5c81e4c3f759a433 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
Tim Petersa608bb22006-06-15 18:06:29 +0000170 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000208 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000209 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append("")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000270 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
291 self.comment = "" # Comment for each file
292 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000329 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 if isinstance(self.filename, unicode):
347 try:
348 return self.filename.encode('ascii'), self.flag_bits
349 except UnicodeEncodeError:
350 return self.filename.encode('utf-8'), self.flag_bits | 0x800
351 else:
352 return self.filename, self.flag_bits
353
354 def _decodeFilename(self):
355 if self.flag_bits & 0x800:
356 return self.filename.decode('utf-8')
357 else:
358 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000365 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 if tp == 1:
367 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000370 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000371 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000372 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 elif ln == 0:
374 counts = ()
375 else:
376 raise RuntimeError, "Corrupt extra field %s"%(ln,)
377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000381 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwis8c436412008-07-03 12:51:14 +0000385 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwis8c436412008-07-03 12:51:14 +0000389 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000395
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000396
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000402 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
445 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
446
447 def __call__(self, c):
448 """Decrypt a single character."""
449 c = ord(c)
450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
452 c = chr(c)
453 self._UpdateKeys(c)
454 return c
455
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000456class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000458 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 """
Tim Petersea5962f2007-03-12 18:07:52 +0000460
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000461 # Max size supported by decompressor.
462 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464 # Read from compressed files in 4k blocks.
465 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000466
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000467 # Search for universal newlines or line chunks.
468 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
469
470 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
471 self._fileobj = fileobj
472 self._decrypter = decrypter
473
Ezio Melotti4611b052010-01-28 01:41:30 +0000474 self._compress_type = zipinfo.compress_type
475 self._compress_size = zipinfo.compress_size
476 self._compress_left = zipinfo.compress_size
477
478 if self._compress_type == ZIP_DEFLATED:
479 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000480 self._unconsumed = ''
481
482 self._readbuffer = ''
483 self._offset = 0
484
485 self._universal = 'U' in mode
486 self.newlines = None
487
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000488 # Adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information.
490 if self._decrypter is not None:
491 self._compress_left -= 12
492
493 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000494 self.name = zipinfo.filename
495
Antoine Pitroue1436d12010-08-12 15:25:51 +0000496 if hasattr(zipinfo, 'CRC'):
497 self._expected_crc = zipinfo.CRC
498 self._running_crc = crc32(b'') & 0xffffffff
499 else:
500 self._expected_crc = None
501
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000502 def readline(self, limit=-1):
503 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000504
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000505 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000506 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000507
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000508 if not self._universal and limit < 0:
509 # Shortcut common case - newline found in buffer.
510 i = self._readbuffer.find('\n', self._offset) + 1
511 if i > 0:
512 line = self._readbuffer[self._offset: i]
513 self._offset = i
514 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000515
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000516 if not self._universal:
517 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000518
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000519 line = ''
520 while limit < 0 or len(line) < limit:
521 readahead = self.peek(2)
522 if readahead == '':
523 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000524
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000525 #
526 # Search for universal newlines or line chunks.
527 #
528 # The pattern returns either a line chunk or a newline, but not
529 # both. Combined with peek(2), we are assured that the sequence
530 # '\r\n' is always retrieved completely and never split into
531 # separate newlines - '\r', '\n' due to coincidental readaheads.
532 #
533 match = self.PATTERN.search(readahead)
534 newline = match.group('newline')
535 if newline is not None:
536 if self.newlines is None:
537 self.newlines = []
538 if newline not in self.newlines:
539 self.newlines.append(newline)
540 self._offset += len(newline)
541 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000542
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000543 chunk = match.group('chunk')
544 if limit >= 0:
545 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000546
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000547 self._offset += len(chunk)
548 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000549
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000550 return line
551
552 def peek(self, n=1):
553 """Returns buffered bytes without advancing the position."""
554 if n > len(self._readbuffer) - self._offset:
555 chunk = self.read(n)
556 self._offset -= len(chunk)
557
558 # Return up to 512 bytes to reduce allocation overhead for tight loops.
559 return self._readbuffer[self._offset: self._offset + 512]
560
561 def readable(self):
562 return True
563
564 def read(self, n=-1):
565 """Read and return up to n bytes.
566 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000567 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000568
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000569 buf = ''
570 while n < 0 or n is None or n > len(buf):
571 data = self.read1(n)
572 if len(data) == 0:
573 return buf
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000574
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000575 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000576
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000577 return buf
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000578
Antoine Pitroue1436d12010-08-12 15:25:51 +0000579 def _update_crc(self, newdata, eof):
580 # Update the CRC using the given data.
581 if self._expected_crc is None:
582 # No need to compute the CRC if we don't have a reference value
583 return
584 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
585 # Check the CRC if we're at the end of the file
586 if eof and self._running_crc != self._expected_crc:
587 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
588
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000589 def read1(self, n):
590 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000591
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000592 # Simplify algorithm (branching) by transforming negative n to large n.
593 if n < 0 or n is None:
594 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000595
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000596 # Bytes available in read buffer.
597 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000598
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000599 # Read from file.
600 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
601 nbytes = n - len_readbuffer - len(self._unconsumed)
602 nbytes = max(nbytes, self.MIN_READ_SIZE)
603 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000604
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000605 data = self._fileobj.read(nbytes)
606 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000607
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000608 if data and self._decrypter is not None:
609 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000610
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000611 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000612 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000613 self._readbuffer = self._readbuffer[self._offset:] + data
614 self._offset = 0
615 else:
616 # Prepare deflated bytes for decompression.
617 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000618
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000619 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000620 if (len(self._unconsumed) > 0 and n > len_readbuffer and
621 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000622 data = self._decompressor.decompress(
623 self._unconsumed,
624 max(n - len_readbuffer, self.MIN_READ_SIZE)
625 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000626
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000627 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000628 eof = len(self._unconsumed) == 0 and self._compress_left == 0
629 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000631
Antoine Pitroue1436d12010-08-12 15:25:51 +0000632 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000633 self._readbuffer = self._readbuffer[self._offset:] + data
634 self._offset = 0
635
636 # Read from buffer.
637 data = self._readbuffer[self._offset: self._offset + n]
638 self._offset += len(data)
639 return data
640
Tim Petersea5962f2007-03-12 18:07:52 +0000641
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000642
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000643class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000644 """ Class with methods to open, read, write, close, list zip files.
645
Martin v. Löwis8c436412008-07-03 12:51:14 +0000646 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000647
Fred Drake3d9091e2001-03-26 15:49:24 +0000648 file: Either the path to the file, or a file-like object.
649 If it is a path, the file will be opened and closed by ZipFile.
650 mode: The mode can be either read "r", write "w" or append "a".
651 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000652 allowZip64: if True ZipFile will create files with ZIP64 extensions when
653 needed, otherwise it will raise an exception when this would
654 be necessary.
655
Fred Drake3d9091e2001-03-26 15:49:24 +0000656 """
Fred Drake484d7352000-10-02 21:14:52 +0000657
Fred Drake90eac282001-02-28 05:29:34 +0000658 fp = None # Set here since __del__ checks it
659
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000660 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000661 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000662 if mode not in ("r", "w", "a"):
663 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
664
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 if compression == ZIP_STORED:
666 pass
667 elif compression == ZIP_DEFLATED:
668 if not zlib:
669 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000670 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000671 else:
672 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000673
674 self._allowZip64 = allowZip64
675 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000676 self.debug = 0 # Level of printing: 0 through 3
677 self.NameToInfo = {} # Find file info given name
678 self.filelist = [] # List of ZipInfo instances for archive
679 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000680 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000681 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000682 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000683
Fred Drake3d9091e2001-03-26 15:49:24 +0000684 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000685 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 self._filePassed = 0
687 self.filename = file
688 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000689 try:
690 self.fp = open(file, modeDict[mode])
691 except IOError:
692 if mode == 'a':
693 mode = key = 'w'
694 self.fp = open(file, modeDict[mode])
695 else:
696 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000697 else:
698 self._filePassed = 1
699 self.fp = file
700 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000701
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 self._GetContents()
704 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000705 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000707 try: # See if file is a zip file
708 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 self.fp.seek(self.start_dir, 0)
711 except BadZipfile: # file is not a zip file, just append
712 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000714 if not self._filePassed:
715 self.fp.close()
716 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 raise RuntimeError, 'Mode must be "r", "w" or "a"'
718
Ezio Melotti569e61f2009-12-30 06:14:51 +0000719 def __enter__(self):
720 return self
721
722 def __exit__(self, type, value, traceback):
723 self.close()
724
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000726 """Read the directory, making sure we close the file if the format
727 is bad."""
728 try:
729 self._RealGetContents()
730 except BadZipfile:
731 if not self._filePassed:
732 self.fp.close()
733 self.fp = None
734 raise
735
736 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000737 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000739 endrec = _EndRecData(fp)
740 if not endrec:
741 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742 if self.debug > 1:
743 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000744 size_cd = endrec[_ECD_SIZE] # bytes in central directory
745 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
746 self.comment = endrec[_ECD_COMMENT] # archive comment
747
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000748 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000749 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000750 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
751 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000752 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
753
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000754 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000755 inferred = concat + offset_cd
756 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 # self.start_dir: Position of start of central directory
758 self.start_dir = offset_cd + concat
759 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000760 data = fp.read(size_cd)
761 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 total = 0
763 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000764 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000765 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 raise BadZipfile, "Bad magic number for central directory"
767 centdir = struct.unpack(structCentralDir, centdir)
768 if self.debug > 2:
769 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000770 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 # Create ZipInfo instance to store file information
772 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000773 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
774 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000775 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 (x.create_version, x.create_system, x.extract_version, x.reserved,
777 x.flag_bits, x.compress_type, t, d,
778 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
779 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
780 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000781 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000783 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000784
785 x._decodeExtra()
786 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000787 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 self.filelist.append(x)
789 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000790
791 # update total bytes read from central directory
792 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
793 + centdir[_CD_EXTRA_FIELD_LENGTH]
794 + centdir[_CD_COMMENT_LENGTH])
795
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 if self.debug > 2:
797 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000798
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799
800 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000801 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 l = []
803 for data in self.filelist:
804 l.append(data.filename)
805 return l
806
807 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000808 """Return a list of class ZipInfo instances for files in the
809 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 return self.filelist
811
812 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000813 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
815 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000816 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
818
819 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000820 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000821 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 for zinfo in self.filelist:
823 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000824 # Read by chunks, to avoid an OverflowError or a
825 # MemoryError with very large embedded files.
826 f = self.open(zinfo.filename, "r")
827 while f.read(chunk_size): # Check CRC-32
828 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000829 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 return zinfo.filename
831
832 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000833 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000834 info = self.NameToInfo.get(name)
835 if info is None:
836 raise KeyError(
837 'There is no item named %r in the archive' % name)
838
839 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000841 def setpassword(self, pwd):
842 """Set default password for encrypted files."""
843 self.pwd = pwd
844
845 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000846 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000847 return self.open(name, "r", pwd).read()
848
849 def open(self, name, mode="r", pwd=None):
850 """Return file-like object for 'name'."""
851 if mode not in ("r", "U", "rU"):
852 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000853 if not self.fp:
854 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000855 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000856
Tim Petersea5962f2007-03-12 18:07:52 +0000857 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000858 # given a file object in the constructor
859 if self._filePassed:
860 zef_file = self.fp
861 else:
862 zef_file = open(self.filename, 'rb')
863
Georg Brandl112aa502008-05-20 08:25:48 +0000864 # Make sure we have an info object
865 if isinstance(name, ZipInfo):
866 # 'name' is already an info object
867 zinfo = name
868 else:
869 # Get info object for name
870 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000871
872 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000873
874 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000875 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000876 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000877 raise BadZipfile, "Bad magic number for file header"
878
879 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000880 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000881 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000882 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000883
884 if fname != zinfo.orig_filename:
885 raise BadZipfile, \
886 'File name in directory "%s" and header "%s" differ.' % (
887 zinfo.orig_filename, fname)
888
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000889 # check for encrypted flag & handle password
890 is_encrypted = zinfo.flag_bits & 0x1
891 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000892 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000893 if not pwd:
894 pwd = self.pwd
895 if not pwd:
896 raise RuntimeError, "File %s is encrypted, " \
897 "password required for extraction" % name
898
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000899 zd = _ZipDecrypter(pwd)
900 # The first 12 bytes in the cypher stream is an encryption header
901 # used to strengthen the algorithm. The first 11 bytes are
902 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000903 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000904 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000905 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000906 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000907 if zinfo.flag_bits & 0x8:
908 # compare against the file type from extended local headers
909 check_byte = (zinfo._raw_time >> 8) & 0xff
910 else:
911 # compare against the CRC otherwise
912 check_byte = (zinfo.CRC >> 24) & 0xff
913 if ord(h[11]) != check_byte:
914 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000915
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000916 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000917
Georg Brandl62416bc2008-01-07 18:47:44 +0000918 def extract(self, member, path=None, pwd=None):
919 """Extract a member from the archive to the current working directory,
920 using its full name. Its file information is extracted as accurately
921 as possible. `member' may be a filename or a ZipInfo object. You can
922 specify a different directory using `path'.
923 """
924 if not isinstance(member, ZipInfo):
925 member = self.getinfo(member)
926
927 if path is None:
928 path = os.getcwd()
929
930 return self._extract_member(member, path, pwd)
931
932 def extractall(self, path=None, members=None, pwd=None):
933 """Extract all members from the archive to the current working
934 directory. `path' specifies a different directory to extract to.
935 `members' is optional and must be a subset of the list returned
936 by namelist().
937 """
938 if members is None:
939 members = self.namelist()
940
941 for zipinfo in members:
942 self.extract(zipinfo, path, pwd)
943
944 def _extract_member(self, member, targetpath, pwd):
945 """Extract the ZipInfo object 'member' to a physical
946 file on the path targetpath.
947 """
948 # build the destination pathname, replacing
949 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000950 # Strip trailing path separator, unless it represents the root.
951 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
952 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000953 targetpath = targetpath[:-1]
954
955 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000956 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000957 targetpath = os.path.join(targetpath, member.filename[1:])
958 else:
959 targetpath = os.path.join(targetpath, member.filename)
960
961 targetpath = os.path.normpath(targetpath)
962
963 # Create all upper directories if necessary.
964 upperdirs = os.path.dirname(targetpath)
965 if upperdirs and not os.path.exists(upperdirs):
966 os.makedirs(upperdirs)
967
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000968 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000969 if not os.path.isdir(targetpath):
970 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000971 return targetpath
972
Georg Brandl112aa502008-05-20 08:25:48 +0000973 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000974 target = file(targetpath, "wb")
975 shutil.copyfileobj(source, target)
976 source.close()
977 target.close()
978
979 return targetpath
980
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000982 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000983 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000984 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000985 print "Duplicate name:", zinfo.filename
986 if self.mode not in ("w", "a"):
987 raise RuntimeError, 'write() requires mode "w" or "a"'
988 if not self.fp:
989 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000990 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000991 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
992 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000993 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
995 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000996 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000997 if zinfo.file_size > ZIP64_LIMIT:
998 if not self._allowZip64:
999 raise LargeZipFile("Filesize would require ZIP64 extensions")
1000 if zinfo.header_offset > ZIP64_LIMIT:
1001 if not self._allowZip64:
1002 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003
1004 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001005 """Put the bytes from filename into the archive under the name
1006 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001007 if not self.fp:
1008 raise RuntimeError(
1009 "Attempt to write to ZIP archive that was already closed")
1010
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001011 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001012 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001013 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001014 date_time = mtime[0:6]
1015 # Create ZipInfo instance to store file information
1016 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001017 arcname = filename
1018 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1019 while arcname[0] in (os.sep, os.altsep):
1020 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001021 if isdir:
1022 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001023 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001024 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001026 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027 else:
Tim Peterse1190062001-01-15 03:34:38 +00001028 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001029
1030 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001031 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001032 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001033
1034 self._writecheck(zinfo)
1035 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001036
1037 if isdir:
1038 zinfo.file_size = 0
1039 zinfo.compress_size = 0
1040 zinfo.CRC = 0
1041 self.filelist.append(zinfo)
1042 self.NameToInfo[zinfo.filename] = zinfo
1043 self.fp.write(zinfo.FileHeader())
1044 return
1045
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001046 with open(filename, "rb") as fp:
1047 # Must overwrite CRC and sizes with correct data later
1048 zinfo.CRC = CRC = 0
1049 zinfo.compress_size = compress_size = 0
1050 zinfo.file_size = file_size = 0
1051 self.fp.write(zinfo.FileHeader())
1052 if zinfo.compress_type == ZIP_DEFLATED:
1053 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1054 zlib.DEFLATED, -15)
1055 else:
1056 cmpr = None
1057 while 1:
1058 buf = fp.read(1024 * 8)
1059 if not buf:
1060 break
1061 file_size = file_size + len(buf)
1062 CRC = crc32(buf, CRC) & 0xffffffff
1063 if cmpr:
1064 buf = cmpr.compress(buf)
1065 compress_size = compress_size + len(buf)
1066 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001067 if cmpr:
1068 buf = cmpr.flush()
1069 compress_size = compress_size + len(buf)
1070 self.fp.write(buf)
1071 zinfo.compress_size = compress_size
1072 else:
1073 zinfo.compress_size = file_size
1074 zinfo.CRC = CRC
1075 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001076 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001077 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001078 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001079 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001080 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001081 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 self.filelist.append(zinfo)
1083 self.NameToInfo[zinfo.filename] = zinfo
1084
Ronald Oussorendd25e862010-02-07 20:18:02 +00001085 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001086 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001087 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1088 the name of the file in the archive."""
1089 if not isinstance(zinfo_or_arcname, ZipInfo):
1090 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001091 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001092
Just van Rossumb083cb32002-12-12 12:23:32 +00001093 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001094 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001095 else:
1096 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001097
1098 if not self.fp:
1099 raise RuntimeError(
1100 "Attempt to write to ZIP archive that was already closed")
1101
Ronald Oussorendd25e862010-02-07 20:18:02 +00001102 if compress_type is not None:
1103 zinfo.compress_type = compress_type
1104
Tim Peterse1190062001-01-15 03:34:38 +00001105 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001106 zinfo.header_offset = self.fp.tell() # Start of header bytes
1107 self._writecheck(zinfo)
1108 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001109 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001110 if zinfo.compress_type == ZIP_DEFLATED:
1111 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1112 zlib.DEFLATED, -15)
1113 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001114 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115 else:
1116 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001117 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001120 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001121 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001122 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001123 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001124 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 self.filelist.append(zinfo)
1126 self.NameToInfo[zinfo.filename] = zinfo
1127
1128 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001129 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001130 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001131
1132 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001133 """Close the file, and for mode "w" and "a" write the ending
1134 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001135 if self.fp is None:
1136 return
Tim Petersa608bb22006-06-15 18:06:29 +00001137
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001138 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 count = 0
1140 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001141 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 count = count + 1
1143 dt = zinfo.date_time
1144 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001145 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001146 extra = []
1147 if zinfo.file_size > ZIP64_LIMIT \
1148 or zinfo.compress_size > ZIP64_LIMIT:
1149 extra.append(zinfo.file_size)
1150 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001151 file_size = 0xffffffff
1152 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001153 else:
1154 file_size = zinfo.file_size
1155 compress_size = zinfo.compress_size
1156
1157 if zinfo.header_offset > ZIP64_LIMIT:
1158 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001159 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001160 else:
1161 header_offset = zinfo.header_offset
1162
1163 extra_data = zinfo.extra
1164 if extra:
1165 # Append a ZIP64 field to the extra's
1166 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001167 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001168 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001169
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001170 extract_version = max(45, zinfo.extract_version)
1171 create_version = max(45, zinfo.create_version)
1172 else:
1173 extract_version = zinfo.extract_version
1174 create_version = zinfo.create_version
1175
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001176 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001177 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001178 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001179 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001180 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001181 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001182 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001183 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001184 0, zinfo.internal_attr, zinfo.external_attr,
1185 header_offset)
1186 except DeprecationWarning:
1187 print >>sys.stderr, (structCentralDir,
1188 stringCentralDir, create_version,
1189 zinfo.create_system, extract_version, zinfo.reserved,
1190 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1191 zinfo.CRC, compress_size, file_size,
1192 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1193 0, zinfo.internal_attr, zinfo.external_attr,
1194 header_offset)
1195 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001197 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001198 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001200
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 pos2 = self.fp.tell()
1202 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001203 centDirCount = count
1204 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001205 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001206 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1207 centDirOffset > ZIP64_LIMIT or
1208 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001209 # Need to write the ZIP64 end-of-archive records
1210 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001211 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001212 44, 45, 45, 0, 0, centDirCount, centDirCount,
1213 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001214 self.fp.write(zip64endrec)
1215
1216 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001217 structEndArchive64Locator,
1218 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001219 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001220 centDirCount = min(centDirCount, 0xFFFF)
1221 centDirSize = min(centDirSize, 0xFFFFFFFF)
1222 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001223
Martin v. Löwis8c436412008-07-03 12:51:14 +00001224 # check for valid comment length
1225 if len(self.comment) >= ZIP_MAX_COMMENT:
1226 if self.debug > 0:
1227 msg = 'Archive comment is too long; truncating to %d bytes' \
1228 % ZIP_MAX_COMMENT
1229 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001230
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001231 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001232 0, 0, centDirCount, centDirCount,
1233 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001234 self.fp.write(endrec)
1235 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001236 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001237
Fred Drake3d9091e2001-03-26 15:49:24 +00001238 if not self._filePassed:
1239 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001240 self.fp = None
1241
1242
1243class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001244 """Class to create ZIP archives with Python library files and packages."""
1245
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001246 def writepy(self, pathname, basename = ""):
1247 """Add all files from "pathname" to the ZIP archive.
1248
Fred Drake484d7352000-10-02 21:14:52 +00001249 If pathname is a package directory, search the directory and
1250 all package subdirectories recursively for all *.py and enter
1251 the modules into the archive. If pathname is a plain
1252 directory, listdir *.py and enter all modules. Else, pathname
1253 must be a Python *.py file and the module will be put into the
1254 archive. Added modules are always module.pyo or module.pyc.
1255 This method will compile the module.py into module.pyc if
1256 necessary.
1257 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 dir, name = os.path.split(pathname)
1259 if os.path.isdir(pathname):
1260 initname = os.path.join(pathname, "__init__.py")
1261 if os.path.isfile(initname):
1262 # This is a package directory, add it
1263 if basename:
1264 basename = "%s/%s" % (basename, name)
1265 else:
1266 basename = name
1267 if self.debug:
1268 print "Adding package in", pathname, "as", basename
1269 fname, arcname = self._get_codename(initname[0:-3], basename)
1270 if self.debug:
1271 print "Adding", arcname
1272 self.write(fname, arcname)
1273 dirlist = os.listdir(pathname)
1274 dirlist.remove("__init__.py")
1275 # Add all *.py files and package subdirectories
1276 for filename in dirlist:
1277 path = os.path.join(pathname, filename)
1278 root, ext = os.path.splitext(filename)
1279 if os.path.isdir(path):
1280 if os.path.isfile(os.path.join(path, "__init__.py")):
1281 # This is a package directory, add it
1282 self.writepy(path, basename) # Recursive call
1283 elif ext == ".py":
1284 fname, arcname = self._get_codename(path[0:-3],
1285 basename)
1286 if self.debug:
1287 print "Adding", arcname
1288 self.write(fname, arcname)
1289 else:
1290 # This is NOT a package directory, add its files at top level
1291 if self.debug:
1292 print "Adding files from directory", pathname
1293 for filename in os.listdir(pathname):
1294 path = os.path.join(pathname, filename)
1295 root, ext = os.path.splitext(filename)
1296 if ext == ".py":
1297 fname, arcname = self._get_codename(path[0:-3],
1298 basename)
1299 if self.debug:
1300 print "Adding", arcname
1301 self.write(fname, arcname)
1302 else:
1303 if pathname[-3:] != ".py":
1304 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001305 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001306 fname, arcname = self._get_codename(pathname[0:-3], basename)
1307 if self.debug:
1308 print "Adding file", arcname
1309 self.write(fname, arcname)
1310
1311 def _get_codename(self, pathname, basename):
1312 """Return (filename, archivename) for the path.
1313
Fred Drake484d7352000-10-02 21:14:52 +00001314 Given a module name path, return the correct file path and
1315 archive name, compiling if necessary. For example, given
1316 /python/lib/string, return (/python/lib/string.pyc, string).
1317 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 file_py = pathname + ".py"
1319 file_pyc = pathname + ".pyc"
1320 file_pyo = pathname + ".pyo"
1321 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001322 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001323 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001325 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001326 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001327 if self.debug:
1328 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001329 try:
1330 py_compile.compile(file_py, file_pyc, None, True)
1331 except py_compile.PyCompileError,err:
1332 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 fname = file_pyc
1334 else:
1335 fname = file_pyc
1336 archivename = os.path.split(fname)[1]
1337 if basename:
1338 archivename = "%s/%s" % (basename, archivename)
1339 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001340
1341
1342def main(args = None):
1343 import textwrap
1344 USAGE=textwrap.dedent("""\
1345 Usage:
1346 zipfile.py -l zipfile.zip # Show listing of a zipfile
1347 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1348 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1349 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1350 """)
1351 if args is None:
1352 args = sys.argv[1:]
1353
1354 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1355 print USAGE
1356 sys.exit(1)
1357
1358 if args[0] == '-l':
1359 if len(args) != 2:
1360 print USAGE
1361 sys.exit(1)
1362 zf = ZipFile(args[1], 'r')
1363 zf.printdir()
1364 zf.close()
1365
1366 elif args[0] == '-t':
1367 if len(args) != 2:
1368 print USAGE
1369 sys.exit(1)
1370 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001371 badfile = zf.testzip()
1372 if badfile:
1373 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001374 print "Done testing"
1375
1376 elif args[0] == '-e':
1377 if len(args) != 3:
1378 print USAGE
1379 sys.exit(1)
1380
1381 zf = ZipFile(args[1], 'r')
1382 out = args[2]
1383 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001384 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001385 tgt = os.path.join(out, path[2:])
1386 else:
1387 tgt = os.path.join(out, path)
1388
1389 tgtdir = os.path.dirname(tgt)
1390 if not os.path.exists(tgtdir):
1391 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001392 with open(tgt, 'wb') as fp:
1393 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001394 zf.close()
1395
1396 elif args[0] == '-c':
1397 if len(args) < 3:
1398 print USAGE
1399 sys.exit(1)
1400
1401 def addToZip(zf, path, zippath):
1402 if os.path.isfile(path):
1403 zf.write(path, zippath, ZIP_DEFLATED)
1404 elif os.path.isdir(path):
1405 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001406 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001407 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001408 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001409
1410 zf = ZipFile(args[1], 'w', allowZip64=True)
1411 for src in args[2:]:
1412 addToZip(zf, src, os.path.basename(src))
1413
1414 zf.close()
1415
1416if __name__ == "__main__":
1417 main()