blob: 34b3f1a7749896784720523664f2377d2ff9e457 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00005import binascii, cStringIO, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +000029ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000131def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 try:
134 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000135 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000143def _EndRecData64(fpin, offset, endrec):
144 """
145 Read the ZIP64 end-of-archive records and use that to update endrec
146 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000147 fpin.seek(offset - sizeEndCentDir64Locator, 2)
148 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000149 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
150 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000151 return endrec
152
153 if diskno != 0 or disks != 1:
154 raise BadZipfile("zipfiles that span multiple disks are not supported")
155
Tim Petersa608bb22006-06-15 18:06:29 +0000156 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000157 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
158 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000159 sig, sz, create_version, read_version, disk_num, disk_dir, \
160 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 struct.unpack(structEndArchive64, data)
162 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000166 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000167 endrec[_ECD_DISK_NUMBER] = disk_num
168 endrec[_ECD_DISK_START] = disk_dir
169 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
170 endrec[_ECD_ENTRIES_TOTAL] = dircount2
171 endrec[_ECD_SIZE] = dirsize
172 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 return endrec
174
175
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000176def _EndRecData(fpin):
177 """Return data from the "End of Central Directory" record, or None.
178
179 The data is a list of the nine items in the ZIP "End of central dir"
180 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181
182 # Determine file size
183 fpin.seek(0, 2)
184 filesize = fpin.tell()
185
186 # Check to see if this is ZIP file with no archive comment (the
187 # "end of central directory" structure should be the last item in the
188 # file if this is the case).
R. David Murray981130b2010-01-06 20:08:02 +0000189 try:
190 fpin.seek(-sizeEndCentDir, 2)
191 except IOError:
192 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000193 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000194 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000195 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000196 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000197 endrec=list(endrec)
198
199 # Append a blank comment and record start offset
200 endrec.append("")
201 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000202
Amaury Forgeot d'Arc945fdd62009-01-18 20:27:45 +0000203 # Try to read the "Zip64 end of central directory" structure
204 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000205
206 # Either this is not a ZIP file, or it is a ZIP file with an archive
207 # comment. Search the end of the file for the "end of central directory"
208 # record signature. The comment is the last item in the ZIP file and may be
209 # up to 64K long. It is assumed that the "end of central directory" magic
210 # number does not appear in the comment.
211 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
212 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 if start >= 0:
216 # found the magic number; attempt to unpack and interpret
217 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000218 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000219 comment = data[start+sizeEndCentDir:]
220 # check that comment length is correct
221 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000222 # Append the archive comment and start offset
223 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000224 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc945fdd62009-01-18 20:27:45 +0000225
226 # Try to read the "Zip64 end of central directory" structure
227 return _EndRecData64(fpin, maxCommentStart + start - filesize,
228 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229
230 # Unable to find a valid end of central directory structure
231 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000232
Fred Drake484d7352000-10-02 21:14:52 +0000233
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000234class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000235 """Class with attributes describing each file in the ZIP archive."""
236
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000237 __slots__ = (
238 'orig_filename',
239 'filename',
240 'date_time',
241 'compress_type',
242 'comment',
243 'extra',
244 'create_system',
245 'create_version',
246 'extract_version',
247 'reserved',
248 'flag_bits',
249 'volume',
250 'internal_attr',
251 'external_attr',
252 'header_offset',
253 'CRC',
254 'compress_size',
255 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000256 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000257 )
258
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000259 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000260 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000261
262 # Terminate the file name at the first null byte. Null bytes in file
263 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000264 null_byte = filename.find(chr(0))
265 if null_byte >= 0:
266 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000267 # This is used to ensure paths in generated ZIP files always use
268 # forward slashes as the directory separator, as required by the
269 # ZIP format specification.
270 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000271 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000272
Greg Ward8e36d282003-06-18 00:53:06 +0000273 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000274 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000275 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000276 self.compress_type = ZIP_STORED # Type of compression for the file
277 self.comment = "" # Comment for each file
278 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000279 if sys.platform == 'win32':
280 self.create_system = 0 # System which created ZIP archive
281 else:
282 # Assume everything else is unix-y
283 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000284 self.create_version = 20 # Version which created ZIP archive
285 self.extract_version = 20 # Version needed to extract archive
286 self.reserved = 0 # Must be zero
287 self.flag_bits = 0 # ZIP flag bits
288 self.volume = 0 # Volume number of file header
289 self.internal_attr = 0 # Internal attributes
290 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000291 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000292 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000293 # CRC CRC-32 of the uncompressed file
294 # compress_size Size of the compressed file
295 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296
297 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000298 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299 dt = self.date_time
300 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000301 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000303 # Set these to zero because we write them after the file data
304 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 else:
Tim Peterse1190062001-01-15 03:34:38 +0000306 CRC = self.CRC
307 compress_size = self.compress_size
308 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000309
310 extra = self.extra
311
312 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
313 # File is larger than what fits into a 4 byte integer,
314 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000315 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000316 extra = extra + struct.pack(fmt,
317 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000318 file_size = 0xffffffff
319 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000320 self.extract_version = max(45, self.extract_version)
321 self.create_version = max(45, self.extract_version)
322
Martin v. Löwis471617d2008-05-05 17:16:58 +0000323 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000324 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000325 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000326 self.compress_type, dostime, dosdate, CRC,
327 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000328 len(filename), len(extra))
329 return header + filename + extra
330
331 def _encodeFilenameFlags(self):
332 if isinstance(self.filename, unicode):
333 try:
334 return self.filename.encode('ascii'), self.flag_bits
335 except UnicodeEncodeError:
336 return self.filename.encode('utf-8'), self.flag_bits | 0x800
337 else:
338 return self.filename, self.flag_bits
339
340 def _decodeFilename(self):
341 if self.flag_bits & 0x800:
342 return self.filename.decode('utf-8')
343 else:
344 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000345
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000346 def _decodeExtra(self):
347 # Try to decode the extra field.
348 extra = self.extra
349 unpack = struct.unpack
350 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000351 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000352 if tp == 1:
353 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000354 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000355 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000356 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000357 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000358 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000359 elif ln == 0:
360 counts = ()
361 else:
362 raise RuntimeError, "Corrupt extra field %s"%(ln,)
363
364 idx = 0
365
366 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000367 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 self.file_size = counts[idx]
369 idx += 1
370
Martin v. Löwis8c436412008-07-03 12:51:14 +0000371 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000372 self.compress_size = counts[idx]
373 idx += 1
374
Martin v. Löwis8c436412008-07-03 12:51:14 +0000375 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000376 old = self.header_offset
377 self.header_offset = counts[idx]
378 idx+=1
379
380 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000381
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000383class _ZipDecrypter:
384 """Class to handle decryption of files stored within a ZIP archive.
385
386 ZIP supports a password-based form of encryption. Even though known
387 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000388 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000389
390 Usage:
391 zd = _ZipDecrypter(mypwd)
392 plain_char = zd(cypher_char)
393 plain_text = map(zd, cypher_text)
394 """
395
396 def _GenerateCRCTable():
397 """Generate a CRC-32 table.
398
399 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
400 internal keys. We noticed that a direct implementation is faster than
401 relying on binascii.crc32().
402 """
403 poly = 0xedb88320
404 table = [0] * 256
405 for i in range(256):
406 crc = i
407 for j in range(8):
408 if crc & 1:
409 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
410 else:
411 crc = ((crc >> 1) & 0x7FFFFFFF)
412 table[i] = crc
413 return table
414 crctable = _GenerateCRCTable()
415
416 def _crc32(self, ch, crc):
417 """Compute the CRC32 primitive on one byte."""
418 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
419
420 def __init__(self, pwd):
421 self.key0 = 305419896
422 self.key1 = 591751049
423 self.key2 = 878082192
424 for p in pwd:
425 self._UpdateKeys(p)
426
427 def _UpdateKeys(self, c):
428 self.key0 = self._crc32(c, self.key0)
429 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
430 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
431 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
432
433 def __call__(self, c):
434 """Decrypt a single character."""
435 c = ord(c)
436 k = self.key2 | 2
437 c = c ^ (((k * (k^1)) >> 8) & 255)
438 c = chr(c)
439 self._UpdateKeys(c)
440 return c
441
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000442class ZipExtFile:
443 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000444 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000445 """
Tim Petersea5962f2007-03-12 18:07:52 +0000446
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000447 def __init__(self, fileobj, zipinfo, decrypt=None):
448 self.fileobj = fileobj
449 self.decrypter = decrypt
450 self.bytes_read = 0L
451 self.rawbuffer = ''
452 self.readbuffer = ''
453 self.linebuffer = ''
454 self.eof = False
455 self.univ_newlines = False
456 self.nlSeps = ("\n", )
457 self.lastdiscard = ''
458
459 self.compress_type = zipinfo.compress_type
460 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000461
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000462 self.closed = False
463 self.mode = "r"
464 self.name = zipinfo.filename
465
466 # read from compressed files in 64k blocks
467 self.compreadsize = 64*1024
468 if self.compress_type == ZIP_DEFLATED:
469 self.dc = zlib.decompressobj(-15)
470
471 def set_univ_newlines(self, univ_newlines):
472 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000473
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000474 # pick line separator char(s) based on universal newlines flag
475 self.nlSeps = ("\n", )
476 if self.univ_newlines:
477 self.nlSeps = ("\r\n", "\r", "\n")
478
479 def __iter__(self):
480 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000481
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000482 def next(self):
483 nextline = self.readline()
484 if not nextline:
485 raise StopIteration()
486
487 return nextline
488
489 def close(self):
490 self.closed = True
491
492 def _checkfornewline(self):
493 nl, nllen = -1, -1
494 if self.linebuffer:
495 # ugly check for cases where half of an \r\n pair was
496 # read on the last pass, and the \r was discarded. In this
497 # case we just throw away the \n at the start of the buffer.
498 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
499 self.linebuffer = self.linebuffer[1:]
500
Tim Petersea5962f2007-03-12 18:07:52 +0000501 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000502 nl = self.linebuffer.find(sep)
503 if nl >= 0:
504 nllen = len(sep)
505 return nl, nllen
506
507 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000508
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000509 def readline(self, size = -1):
510 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000511 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000512 """
513 if size < 0:
514 size = sys.maxint
515 elif size == 0:
516 return ''
517
518 # check for a newline already in buffer
519 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000520
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000521 if nl >= 0:
522 # the next line was already in the buffer
523 nl = min(nl, size)
524 else:
525 # no line break in buffer - try to read more
526 size -= len(self.linebuffer)
527 while nl < 0 and size > 0:
528 buf = self.read(min(size, 100))
529 if not buf:
530 break
531 self.linebuffer += buf
532 size -= len(buf)
533
534 # check for a newline in buffer
535 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000536
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000537 # we either ran out of bytes in the file, or
538 # met the specified size limit without finding a newline,
539 # so return current buffer
540 if nl < 0:
541 s = self.linebuffer
542 self.linebuffer = ''
543 return s
544
545 buf = self.linebuffer[:nl]
546 self.lastdiscard = self.linebuffer[nl:nl + nllen]
547 self.linebuffer = self.linebuffer[nl + nllen:]
548
549 # line is always returned with \n as newline char (except possibly
550 # for a final incomplete line in the file, which is handled above).
551 return buf + "\n"
552
553 def readlines(self, sizehint = -1):
554 """Return a list with all (following) lines. The sizehint parameter
555 is ignored in this implementation.
556 """
557 result = []
558 while True:
559 line = self.readline()
560 if not line: break
561 result.append(line)
562 return result
563
564 def read(self, size = None):
565 # act like file() obj and return empty string if size is 0
566 if size == 0:
567 return ''
568
569 # determine read size
570 bytesToRead = self.compress_size - self.bytes_read
571
572 # adjust read size for encrypted files since the first 12 bytes
573 # are for the encryption/password information
574 if self.decrypter is not None:
575 bytesToRead -= 12
576
577 if size is not None and size >= 0:
578 if self.compress_type == ZIP_STORED:
579 lr = len(self.readbuffer)
580 bytesToRead = min(bytesToRead, size - lr)
581 elif self.compress_type == ZIP_DEFLATED:
582 if len(self.readbuffer) > size:
583 # the user has requested fewer bytes than we've already
584 # pulled through the decompressor; don't read any more
585 bytesToRead = 0
586 else:
587 # user will use up the buffer, so read some more
588 lr = len(self.rawbuffer)
589 bytesToRead = min(bytesToRead, self.compreadsize - lr)
590
591 # avoid reading past end of file contents
592 if bytesToRead + self.bytes_read > self.compress_size:
593 bytesToRead = self.compress_size - self.bytes_read
594
595 # try to read from file (if necessary)
596 if bytesToRead > 0:
597 bytes = self.fileobj.read(bytesToRead)
598 self.bytes_read += len(bytes)
599 self.rawbuffer += bytes
600
601 # handle contents of raw buffer
602 if self.rawbuffer:
603 newdata = self.rawbuffer
604 self.rawbuffer = ''
605
606 # decrypt new data if we were given an object to handle that
607 if newdata and self.decrypter is not None:
608 newdata = ''.join(map(self.decrypter, newdata))
609
610 # decompress newly read data if necessary
611 if newdata and self.compress_type == ZIP_DEFLATED:
612 newdata = self.dc.decompress(newdata)
613 self.rawbuffer = self.dc.unconsumed_tail
614 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000615 # we're out of raw bytes (both from the file and
616 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000617 # decompressor is done
618 newdata += self.dc.flush()
619 # prevent decompressor from being used again
620 self.dc = None
621
622 self.readbuffer += newdata
623
624
625 # return what the user asked for
626 if size is None or len(self.readbuffer) <= size:
627 bytes = self.readbuffer
628 self.readbuffer = ''
629 else:
630 bytes = self.readbuffer[:size]
631 self.readbuffer = self.readbuffer[size:]
632
633 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000634
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000635
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000636class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000637 """ Class with methods to open, read, write, close, list zip files.
638
Martin v. Löwis8c436412008-07-03 12:51:14 +0000639 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000640
Fred Drake3d9091e2001-03-26 15:49:24 +0000641 file: Either the path to the file, or a file-like object.
642 If it is a path, the file will be opened and closed by ZipFile.
643 mode: The mode can be either read "r", write "w" or append "a".
644 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000645 allowZip64: if True ZipFile will create files with ZIP64 extensions when
646 needed, otherwise it will raise an exception when this would
647 be necessary.
648
Fred Drake3d9091e2001-03-26 15:49:24 +0000649 """
Fred Drake484d7352000-10-02 21:14:52 +0000650
Fred Drake90eac282001-02-28 05:29:34 +0000651 fp = None # Set here since __del__ checks it
652
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000653 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000654 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000655 if mode not in ("r", "w", "a"):
656 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
657
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000658 if compression == ZIP_STORED:
659 pass
660 elif compression == ZIP_DEFLATED:
661 if not zlib:
662 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000663 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000664 else:
665 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000666
667 self._allowZip64 = allowZip64
668 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000669 self.debug = 0 # Level of printing: 0 through 3
670 self.NameToInfo = {} # Find file info given name
671 self.filelist = [] # List of ZipInfo instances for archive
672 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000673 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000674 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000675 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000676
Fred Drake3d9091e2001-03-26 15:49:24 +0000677 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000678 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000679 self._filePassed = 0
680 self.filename = file
681 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000682 try:
683 self.fp = open(file, modeDict[mode])
684 except IOError:
685 if mode == 'a':
686 mode = key = 'w'
687 self.fp = open(file, modeDict[mode])
688 else:
689 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000690 else:
691 self._filePassed = 1
692 self.fp = file
693 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000694
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000695 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 self._GetContents()
697 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000698 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000700 try: # See if file is a zip file
701 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000703 self.fp.seek(self.start_dir, 0)
704 except BadZipfile: # file is not a zip file, just append
705 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000707 if not self._filePassed:
708 self.fp.close()
709 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 raise RuntimeError, 'Mode must be "r", "w" or "a"'
711
712 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000713 """Read the directory, making sure we close the file if the format
714 is bad."""
715 try:
716 self._RealGetContents()
717 except BadZipfile:
718 if not self._filePassed:
719 self.fp.close()
720 self.fp = None
721 raise
722
723 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000724 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000726 endrec = _EndRecData(fp)
727 if not endrec:
728 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 if self.debug > 1:
730 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000731 size_cd = endrec[_ECD_SIZE] # bytes in central directory
732 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
733 self.comment = endrec[_ECD_COMMENT] # archive comment
734
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000736 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000737 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
738 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000739 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
740
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000742 inferred = concat + offset_cd
743 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 # self.start_dir: Position of start of central directory
745 self.start_dir = offset_cd + concat
746 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000747 data = fp.read(size_cd)
748 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749 total = 0
750 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000751 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000752 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 raise BadZipfile, "Bad magic number for central directory"
754 centdir = struct.unpack(structCentralDir, centdir)
755 if self.debug > 2:
756 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000757 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000758 # Create ZipInfo instance to store file information
759 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000760 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
761 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000762 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000763 (x.create_version, x.create_system, x.extract_version, x.reserved,
764 x.flag_bits, x.compress_type, t, d,
765 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
766 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
767 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000768 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000770 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000771
772 x._decodeExtra()
773 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000774 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 self.filelist.append(x)
776 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000777
778 # update total bytes read from central directory
779 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
780 + centdir[_CD_EXTRA_FIELD_LENGTH]
781 + centdir[_CD_COMMENT_LENGTH])
782
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 if self.debug > 2:
784 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000785
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786
787 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000788 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 l = []
790 for data in self.filelist:
791 l.append(data.filename)
792 return l
793
794 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000795 """Return a list of class ZipInfo instances for files in the
796 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000797 return self.filelist
798
799 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000800 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
802 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000803 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
805
806 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000807 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000808 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 for zinfo in self.filelist:
810 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000811 # Read by chunks, to avoid an OverflowError or a
812 # MemoryError with very large embedded files.
813 f = self.open(zinfo.filename, "r")
814 while f.read(chunk_size): # Check CRC-32
815 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000816 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 return zinfo.filename
818
819 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000820 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000821 info = self.NameToInfo.get(name)
822 if info is None:
823 raise KeyError(
824 'There is no item named %r in the archive' % name)
825
826 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000828 def setpassword(self, pwd):
829 """Set default password for encrypted files."""
830 self.pwd = pwd
831
832 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000833 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000834 return self.open(name, "r", pwd).read()
835
836 def open(self, name, mode="r", pwd=None):
837 """Return file-like object for 'name'."""
838 if mode not in ("r", "U", "rU"):
839 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 if not self.fp:
841 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000842 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000843
Tim Petersea5962f2007-03-12 18:07:52 +0000844 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000845 # given a file object in the constructor
846 if self._filePassed:
847 zef_file = self.fp
848 else:
849 zef_file = open(self.filename, 'rb')
850
Georg Brandl112aa502008-05-20 08:25:48 +0000851 # Make sure we have an info object
852 if isinstance(name, ZipInfo):
853 # 'name' is already an info object
854 zinfo = name
855 else:
856 # Get info object for name
857 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000858
859 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000860
861 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000862 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000863 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000864 raise BadZipfile, "Bad magic number for file header"
865
866 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000867 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000868 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000869 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000870
871 if fname != zinfo.orig_filename:
872 raise BadZipfile, \
873 'File name in directory "%s" and header "%s" differ.' % (
874 zinfo.orig_filename, fname)
875
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000876 # check for encrypted flag & handle password
877 is_encrypted = zinfo.flag_bits & 0x1
878 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000879 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000880 if not pwd:
881 pwd = self.pwd
882 if not pwd:
883 raise RuntimeError, "File %s is encrypted, " \
884 "password required for extraction" % name
885
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000886 zd = _ZipDecrypter(pwd)
887 # The first 12 bytes in the cypher stream is an encryption header
888 # used to strengthen the algorithm. The first 11 bytes are
889 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000890 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000891 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000892 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000893 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000894 if zinfo.flag_bits & 0x8:
895 # compare against the file type from extended local headers
896 check_byte = (zinfo._raw_time >> 8) & 0xff
897 else:
898 # compare against the CRC otherwise
899 check_byte = (zinfo.CRC >> 24) & 0xff
900 if ord(h[11]) != check_byte:
901 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000902
903 # build and return a ZipExtFile
904 if zd is None:
905 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000906 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000907 zef = ZipExtFile(zef_file, zinfo, zd)
908
909 # set universal newlines on ZipExtFile if necessary
910 if "U" in mode:
911 zef.set_univ_newlines(True)
912 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000913
Georg Brandl62416bc2008-01-07 18:47:44 +0000914 def extract(self, member, path=None, pwd=None):
915 """Extract a member from the archive to the current working directory,
916 using its full name. Its file information is extracted as accurately
917 as possible. `member' may be a filename or a ZipInfo object. You can
918 specify a different directory using `path'.
919 """
920 if not isinstance(member, ZipInfo):
921 member = self.getinfo(member)
922
923 if path is None:
924 path = os.getcwd()
925
926 return self._extract_member(member, path, pwd)
927
928 def extractall(self, path=None, members=None, pwd=None):
929 """Extract all members from the archive to the current working
930 directory. `path' specifies a different directory to extract to.
931 `members' is optional and must be a subset of the list returned
932 by namelist().
933 """
934 if members is None:
935 members = self.namelist()
936
937 for zipinfo in members:
938 self.extract(zipinfo, path, pwd)
939
940 def _extract_member(self, member, targetpath, pwd):
941 """Extract the ZipInfo object 'member' to a physical
942 file on the path targetpath.
943 """
944 # build the destination pathname, replacing
945 # forward slashes to platform specific separators.
Antoine Pitroue199a492009-05-04 21:24:37 +0000946 # Strip trailing path separator, unless it represents the root.
947 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
948 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000949 targetpath = targetpath[:-1]
950
951 # don't include leading "/" from file name if present
Martin v. Löwis3a8071a2009-01-24 14:04:33 +0000952 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000953 targetpath = os.path.join(targetpath, member.filename[1:])
954 else:
955 targetpath = os.path.join(targetpath, member.filename)
956
957 targetpath = os.path.normpath(targetpath)
958
959 # Create all upper directories if necessary.
960 upperdirs = os.path.dirname(targetpath)
961 if upperdirs and not os.path.exists(upperdirs):
962 os.makedirs(upperdirs)
963
Martin v. Löwis3a8071a2009-01-24 14:04:33 +0000964 if member.filename[-1] == '/':
Martin v. Löwise7e46f82009-05-24 19:42:14 +0000965 if not os.path.isdir(targetpath):
966 os.mkdir(targetpath)
Martin v. Löwis3a8071a2009-01-24 14:04:33 +0000967 return targetpath
968
Georg Brandl112aa502008-05-20 08:25:48 +0000969 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000970 target = file(targetpath, "wb")
971 shutil.copyfileobj(source, target)
972 source.close()
973 target.close()
974
975 return targetpath
976
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000977 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000978 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000979 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000980 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 print "Duplicate name:", zinfo.filename
982 if self.mode not in ("w", "a"):
983 raise RuntimeError, 'write() requires mode "w" or "a"'
984 if not self.fp:
985 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000986 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
988 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000989 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
991 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000992 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000993 if zinfo.file_size > ZIP64_LIMIT:
994 if not self._allowZip64:
995 raise LargeZipFile("Filesize would require ZIP64 extensions")
996 if zinfo.header_offset > ZIP64_LIMIT:
997 if not self._allowZip64:
998 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999
1000 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001001 """Put the bytes from filename into the archive under the name
1002 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001003 if not self.fp:
1004 raise RuntimeError(
1005 "Attempt to write to ZIP archive that was already closed")
1006
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 st = os.stat(filename)
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00001008 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001009 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 date_time = mtime[0:6]
1011 # Create ZipInfo instance to store file information
1012 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001013 arcname = filename
1014 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1015 while arcname[0] in (os.sep, os.altsep):
1016 arcname = arcname[1:]
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00001017 if isdir:
1018 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001019 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001020 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001022 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 else:
Tim Peterse1190062001-01-15 03:34:38 +00001024 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001025
1026 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001027 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001028 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001029
1030 self._writecheck(zinfo)
1031 self._didModify = True
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00001032
1033 if isdir:
1034 zinfo.file_size = 0
1035 zinfo.compress_size = 0
1036 zinfo.CRC = 0
1037 self.filelist.append(zinfo)
1038 self.NameToInfo[zinfo.filename] = zinfo
1039 self.fp.write(zinfo.FileHeader())
1040 return
1041
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001042 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001043 # Must overwrite CRC and sizes with correct data later
1044 zinfo.CRC = CRC = 0
1045 zinfo.compress_size = compress_size = 0
1046 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if zinfo.compress_type == ZIP_DEFLATED:
1049 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1050 zlib.DEFLATED, -15)
1051 else:
1052 cmpr = None
1053 while 1:
1054 buf = fp.read(1024 * 8)
1055 if not buf:
1056 break
1057 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001058 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 if cmpr:
1060 buf = cmpr.compress(buf)
1061 compress_size = compress_size + len(buf)
1062 self.fp.write(buf)
1063 fp.close()
1064 if cmpr:
1065 buf = cmpr.flush()
1066 compress_size = compress_size + len(buf)
1067 self.fp.write(buf)
1068 zinfo.compress_size = compress_size
1069 else:
1070 zinfo.compress_size = file_size
1071 zinfo.CRC = CRC
1072 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001073 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001074 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001075 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001076 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001077 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001078 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001079 self.filelist.append(zinfo)
1080 self.NameToInfo[zinfo.filename] = zinfo
1081
Just van Rossumb083cb32002-12-12 12:23:32 +00001082 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001083 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001084 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1085 the name of the file in the archive."""
1086 if not isinstance(zinfo_or_arcname, ZipInfo):
1087 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001088 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001089 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001090 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001091 else:
1092 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001093
1094 if not self.fp:
1095 raise RuntimeError(
1096 "Attempt to write to ZIP archive that was already closed")
1097
Tim Peterse1190062001-01-15 03:34:38 +00001098 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001099 zinfo.header_offset = self.fp.tell() # Start of header bytes
1100 self._writecheck(zinfo)
1101 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001102 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if zinfo.compress_type == ZIP_DEFLATED:
1104 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1105 zlib.DEFLATED, -15)
1106 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001107 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 else:
1109 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001110 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001113 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001115 # Write CRC and file sizes after the file data
Gregory P. Smithe1e67642009-06-26 08:19:19 +00001116 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001117 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 self.filelist.append(zinfo)
1119 self.NameToInfo[zinfo.filename] = zinfo
1120
1121 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001122 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001123 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001124
1125 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001126 """Close the file, and for mode "w" and "a" write the ending
1127 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001128 if self.fp is None:
1129 return
Tim Petersa608bb22006-06-15 18:06:29 +00001130
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001131 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 count = 0
1133 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001134 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 count = count + 1
1136 dt = zinfo.date_time
1137 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001138 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001139 extra = []
1140 if zinfo.file_size > ZIP64_LIMIT \
1141 or zinfo.compress_size > ZIP64_LIMIT:
1142 extra.append(zinfo.file_size)
1143 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001144 file_size = 0xffffffff
1145 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001146 else:
1147 file_size = zinfo.file_size
1148 compress_size = zinfo.compress_size
1149
1150 if zinfo.header_offset > ZIP64_LIMIT:
1151 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001152 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001153 else:
1154 header_offset = zinfo.header_offset
1155
1156 extra_data = zinfo.extra
1157 if extra:
1158 # Append a ZIP64 field to the extra's
1159 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001160 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001161 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001162
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001163 extract_version = max(45, zinfo.extract_version)
1164 create_version = max(45, zinfo.create_version)
1165 else:
1166 extract_version = zinfo.extract_version
1167 create_version = zinfo.create_version
1168
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001169 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001170 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001171 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001172 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001173 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001174 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001175 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001176 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001177 0, zinfo.internal_attr, zinfo.external_attr,
1178 header_offset)
1179 except DeprecationWarning:
1180 print >>sys.stderr, (structCentralDir,
1181 stringCentralDir, create_version,
1182 zinfo.create_system, extract_version, zinfo.reserved,
1183 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1184 zinfo.CRC, compress_size, file_size,
1185 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1186 0, zinfo.internal_attr, zinfo.external_attr,
1187 header_offset)
1188 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001190 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001191 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001193
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194 pos2 = self.fp.tell()
1195 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001196 centDirCount = count
1197 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001198 centDirOffset = pos1
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001199 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1200 centDirOffset > ZIP64_LIMIT or
1201 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001202 # Need to write the ZIP64 end-of-archive records
1203 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001204 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001205 44, 45, 45, 0, 0, centDirCount, centDirCount,
1206 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001207 self.fp.write(zip64endrec)
1208
1209 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001210 structEndArchive64Locator,
1211 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001212 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001213 centDirCount = min(centDirCount, 0xFFFF)
1214 centDirSize = min(centDirSize, 0xFFFFFFFF)
1215 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001216
Martin v. Löwis8c436412008-07-03 12:51:14 +00001217 # check for valid comment length
1218 if len(self.comment) >= ZIP_MAX_COMMENT:
1219 if self.debug > 0:
1220 msg = 'Archive comment is too long; truncating to %d bytes' \
1221 % ZIP_MAX_COMMENT
1222 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001223
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001224 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001225 0, 0, centDirCount, centDirCount,
1226 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001227 self.fp.write(endrec)
1228 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001229 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001230
Fred Drake3d9091e2001-03-26 15:49:24 +00001231 if not self._filePassed:
1232 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 self.fp = None
1234
1235
1236class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001237 """Class to create ZIP archives with Python library files and packages."""
1238
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001239 def writepy(self, pathname, basename = ""):
1240 """Add all files from "pathname" to the ZIP archive.
1241
Fred Drake484d7352000-10-02 21:14:52 +00001242 If pathname is a package directory, search the directory and
1243 all package subdirectories recursively for all *.py and enter
1244 the modules into the archive. If pathname is a plain
1245 directory, listdir *.py and enter all modules. Else, pathname
1246 must be a Python *.py file and the module will be put into the
1247 archive. Added modules are always module.pyo or module.pyc.
1248 This method will compile the module.py into module.pyc if
1249 necessary.
1250 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 dir, name = os.path.split(pathname)
1252 if os.path.isdir(pathname):
1253 initname = os.path.join(pathname, "__init__.py")
1254 if os.path.isfile(initname):
1255 # This is a package directory, add it
1256 if basename:
1257 basename = "%s/%s" % (basename, name)
1258 else:
1259 basename = name
1260 if self.debug:
1261 print "Adding package in", pathname, "as", basename
1262 fname, arcname = self._get_codename(initname[0:-3], basename)
1263 if self.debug:
1264 print "Adding", arcname
1265 self.write(fname, arcname)
1266 dirlist = os.listdir(pathname)
1267 dirlist.remove("__init__.py")
1268 # Add all *.py files and package subdirectories
1269 for filename in dirlist:
1270 path = os.path.join(pathname, filename)
1271 root, ext = os.path.splitext(filename)
1272 if os.path.isdir(path):
1273 if os.path.isfile(os.path.join(path, "__init__.py")):
1274 # This is a package directory, add it
1275 self.writepy(path, basename) # Recursive call
1276 elif ext == ".py":
1277 fname, arcname = self._get_codename(path[0:-3],
1278 basename)
1279 if self.debug:
1280 print "Adding", arcname
1281 self.write(fname, arcname)
1282 else:
1283 # This is NOT a package directory, add its files at top level
1284 if self.debug:
1285 print "Adding files from directory", pathname
1286 for filename in os.listdir(pathname):
1287 path = os.path.join(pathname, filename)
1288 root, ext = os.path.splitext(filename)
1289 if ext == ".py":
1290 fname, arcname = self._get_codename(path[0:-3],
1291 basename)
1292 if self.debug:
1293 print "Adding", arcname
1294 self.write(fname, arcname)
1295 else:
1296 if pathname[-3:] != ".py":
1297 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001298 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001299 fname, arcname = self._get_codename(pathname[0:-3], basename)
1300 if self.debug:
1301 print "Adding file", arcname
1302 self.write(fname, arcname)
1303
1304 def _get_codename(self, pathname, basename):
1305 """Return (filename, archivename) for the path.
1306
Fred Drake484d7352000-10-02 21:14:52 +00001307 Given a module name path, return the correct file path and
1308 archive name, compiling if necessary. For example, given
1309 /python/lib/string, return (/python/lib/string.pyc, string).
1310 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001311 file_py = pathname + ".py"
1312 file_pyc = pathname + ".pyc"
1313 file_pyo = pathname + ".pyo"
1314 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001315 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001316 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001318 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001319 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 if self.debug:
1321 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001322 try:
1323 py_compile.compile(file_py, file_pyc, None, True)
1324 except py_compile.PyCompileError,err:
1325 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 fname = file_pyc
1327 else:
1328 fname = file_pyc
1329 archivename = os.path.split(fname)[1]
1330 if basename:
1331 archivename = "%s/%s" % (basename, archivename)
1332 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001333
1334
1335def main(args = None):
1336 import textwrap
1337 USAGE=textwrap.dedent("""\
1338 Usage:
1339 zipfile.py -l zipfile.zip # Show listing of a zipfile
1340 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1341 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1342 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1343 """)
1344 if args is None:
1345 args = sys.argv[1:]
1346
1347 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1348 print USAGE
1349 sys.exit(1)
1350
1351 if args[0] == '-l':
1352 if len(args) != 2:
1353 print USAGE
1354 sys.exit(1)
1355 zf = ZipFile(args[1], 'r')
1356 zf.printdir()
1357 zf.close()
1358
1359 elif args[0] == '-t':
1360 if len(args) != 2:
1361 print USAGE
1362 sys.exit(1)
1363 zf = ZipFile(args[1], 'r')
1364 zf.testzip()
1365 print "Done testing"
1366
1367 elif args[0] == '-e':
1368 if len(args) != 3:
1369 print USAGE
1370 sys.exit(1)
1371
1372 zf = ZipFile(args[1], 'r')
1373 out = args[2]
1374 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001375 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001376 tgt = os.path.join(out, path[2:])
1377 else:
1378 tgt = os.path.join(out, path)
1379
1380 tgtdir = os.path.dirname(tgt)
1381 if not os.path.exists(tgtdir):
1382 os.makedirs(tgtdir)
1383 fp = open(tgt, 'wb')
1384 fp.write(zf.read(path))
1385 fp.close()
1386 zf.close()
1387
1388 elif args[0] == '-c':
1389 if len(args) < 3:
1390 print USAGE
1391 sys.exit(1)
1392
1393 def addToZip(zf, path, zippath):
1394 if os.path.isfile(path):
1395 zf.write(path, zippath, ZIP_DEFLATED)
1396 elif os.path.isdir(path):
1397 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001398 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001399 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001400 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001401
1402 zf = ZipFile(args[1], 'w', allowZip64=True)
1403 for src in args[2:]:
1404 addToZip(zf, src, os.path.basename(src))
1405
1406 zf.close()
1407
1408if __name__ == "__main__":
1409 main()