blob: 6ee9923e7963ea939c4052f84679ee964f1619bb [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000029ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000131def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000132 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133 if _EndRecData(fp):
134 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000135 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000137 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000139def is_zipfile(filename):
140 """Quickly see if a file is a ZIP file by checking the magic number.
141
142 The filename argument may be a file or file-like object too.
143 """
144 result = False
145 try:
146 if hasattr(filename, "read"):
147 result = _check_zipfile(fp=filename)
148 else:
149 with open(filename, "rb") as fp:
150 result = _check_zipfile(fp)
151 except IOError:
152 pass
153 return result
154
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000155def _EndRecData64(fpin, offset, endrec):
156 """
157 Read the ZIP64 end-of-archive records and use that to update endrec
158 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000159 fpin.seek(offset - sizeEndCentDir64Locator, 2)
160 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
162 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 if diskno != 0 or disks != 1:
166 raise BadZipfile("zipfiles that span multiple disks are not supported")
167
Tim Petersa608bb22006-06-15 18:06:29 +0000168 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000169 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
170 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 sig, sz, create_version, read_version, disk_num, disk_dir, \
172 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000173 struct.unpack(structEndArchive64, data)
174 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000175 return endrec
176
177 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000178 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000179 endrec[_ECD_DISK_NUMBER] = disk_num
180 endrec[_ECD_DISK_START] = disk_dir
181 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
182 endrec[_ECD_ENTRIES_TOTAL] = dircount2
183 endrec[_ECD_SIZE] = dirsize
184 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000185 return endrec
186
187
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000188def _EndRecData(fpin):
189 """Return data from the "End of Central Directory" record, or None.
190
191 The data is a list of the nine items in the ZIP "End of central dir"
192 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000193
194 # Determine file size
195 fpin.seek(0, 2)
196 filesize = fpin.tell()
197
198 # Check to see if this is ZIP file with no archive comment (the
199 # "end of central directory" structure should be the last item in the
200 # file if this is the case).
201 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000202 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000203 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000204 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000205 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000206 endrec=list(endrec)
207
208 # Append a blank comment and record start offset
209 endrec.append("")
210 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000211
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000212 # Try to read the "Zip64 end of central directory" structure
213 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000214
215 # Either this is not a ZIP file, or it is a ZIP file with an archive
216 # comment. Search the end of the file for the "end of central directory"
217 # record signature. The comment is the last item in the ZIP file and may be
218 # up to 64K long. It is assumed that the "end of central directory" magic
219 # number does not appear in the comment.
220 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
221 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000222 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000223 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000224 if start >= 0:
225 # found the magic number; attempt to unpack and interpret
226 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000227 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000228 comment = data[start+sizeEndCentDir:]
229 # check that comment length is correct
230 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231 # Append the archive comment and start offset
232 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000233 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000234
235 # Try to read the "Zip64 end of central directory" structure
236 return _EndRecData64(fpin, maxCommentStart + start - filesize,
237 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000238
239 # Unable to find a valid end of central directory structure
240 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000241
Fred Drake484d7352000-10-02 21:14:52 +0000242
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000243class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000244 """Class with attributes describing each file in the ZIP archive."""
245
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000246 __slots__ = (
247 'orig_filename',
248 'filename',
249 'date_time',
250 'compress_type',
251 'comment',
252 'extra',
253 'create_system',
254 'create_version',
255 'extract_version',
256 'reserved',
257 'flag_bits',
258 'volume',
259 'internal_attr',
260 'external_attr',
261 'header_offset',
262 'CRC',
263 'compress_size',
264 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000265 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000266 )
267
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000268 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000269 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000270
271 # Terminate the file name at the first null byte. Null bytes in file
272 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000273 null_byte = filename.find(chr(0))
274 if null_byte >= 0:
275 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000276 # This is used to ensure paths in generated ZIP files always use
277 # forward slashes as the directory separator, as required by the
278 # ZIP format specification.
279 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000280 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281
Greg Ward8e36d282003-06-18 00:53:06 +0000282 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000283 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000284 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000285 self.compress_type = ZIP_STORED # Type of compression for the file
286 self.comment = "" # Comment for each file
287 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000288 if sys.platform == 'win32':
289 self.create_system = 0 # System which created ZIP archive
290 else:
291 # Assume everything else is unix-y
292 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000293 self.create_version = 20 # Version which created ZIP archive
294 self.extract_version = 20 # Version needed to extract archive
295 self.reserved = 0 # Must be zero
296 self.flag_bits = 0 # ZIP flag bits
297 self.volume = 0 # Volume number of file header
298 self.internal_attr = 0 # Internal attributes
299 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000301 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000302 # CRC CRC-32 of the uncompressed file
303 # compress_size Size of the compressed file
304 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305
306 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000307 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000308 dt = self.date_time
309 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000310 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000311 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000312 # Set these to zero because we write them after the file data
313 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314 else:
Tim Peterse1190062001-01-15 03:34:38 +0000315 CRC = self.CRC
316 compress_size = self.compress_size
317 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000318
319 extra = self.extra
320
321 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
322 # File is larger than what fits into a 4 byte integer,
323 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000324 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000325 extra = extra + struct.pack(fmt,
326 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000327 file_size = 0xffffffff
328 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000329 self.extract_version = max(45, self.extract_version)
330 self.create_version = max(45, self.extract_version)
331
Martin v. Löwis471617d2008-05-05 17:16:58 +0000332 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000333 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000334 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 self.compress_type, dostime, dosdate, CRC,
336 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 len(filename), len(extra))
338 return header + filename + extra
339
340 def _encodeFilenameFlags(self):
341 if isinstance(self.filename, unicode):
342 try:
343 return self.filename.encode('ascii'), self.flag_bits
344 except UnicodeEncodeError:
345 return self.filename.encode('utf-8'), self.flag_bits | 0x800
346 else:
347 return self.filename, self.flag_bits
348
349 def _decodeFilename(self):
350 if self.flag_bits & 0x800:
351 return self.filename.decode('utf-8')
352 else:
353 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000354
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000355 def _decodeExtra(self):
356 # Try to decode the extra field.
357 extra = self.extra
358 unpack = struct.unpack
359 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000360 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000361 if tp == 1:
362 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000363 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000364 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000365 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000367 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 elif ln == 0:
369 counts = ()
370 else:
371 raise RuntimeError, "Corrupt extra field %s"%(ln,)
372
373 idx = 0
374
375 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 self.file_size = counts[idx]
378 idx += 1
379
Martin v. Löwis8c436412008-07-03 12:51:14 +0000380 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 self.compress_size = counts[idx]
382 idx += 1
383
Martin v. Löwis8c436412008-07-03 12:51:14 +0000384 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000385 old = self.header_offset
386 self.header_offset = counts[idx]
387 idx+=1
388
389 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000390
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000392class _ZipDecrypter:
393 """Class to handle decryption of files stored within a ZIP archive.
394
395 ZIP supports a password-based form of encryption. Even though known
396 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000397 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000398
399 Usage:
400 zd = _ZipDecrypter(mypwd)
401 plain_char = zd(cypher_char)
402 plain_text = map(zd, cypher_text)
403 """
404
405 def _GenerateCRCTable():
406 """Generate a CRC-32 table.
407
408 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
409 internal keys. We noticed that a direct implementation is faster than
410 relying on binascii.crc32().
411 """
412 poly = 0xedb88320
413 table = [0] * 256
414 for i in range(256):
415 crc = i
416 for j in range(8):
417 if crc & 1:
418 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
419 else:
420 crc = ((crc >> 1) & 0x7FFFFFFF)
421 table[i] = crc
422 return table
423 crctable = _GenerateCRCTable()
424
425 def _crc32(self, ch, crc):
426 """Compute the CRC32 primitive on one byte."""
427 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
428
429 def __init__(self, pwd):
430 self.key0 = 305419896
431 self.key1 = 591751049
432 self.key2 = 878082192
433 for p in pwd:
434 self._UpdateKeys(p)
435
436 def _UpdateKeys(self, c):
437 self.key0 = self._crc32(c, self.key0)
438 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
439 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
440 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
441
442 def __call__(self, c):
443 """Decrypt a single character."""
444 c = ord(c)
445 k = self.key2 | 2
446 c = c ^ (((k * (k^1)) >> 8) & 255)
447 c = chr(c)
448 self._UpdateKeys(c)
449 return c
450
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000451class ZipExtFile:
452 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000453 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000454 """
Tim Petersea5962f2007-03-12 18:07:52 +0000455
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000456 def __init__(self, fileobj, zipinfo, decrypt=None):
457 self.fileobj = fileobj
458 self.decrypter = decrypt
459 self.bytes_read = 0L
460 self.rawbuffer = ''
461 self.readbuffer = ''
462 self.linebuffer = ''
463 self.eof = False
464 self.univ_newlines = False
465 self.nlSeps = ("\n", )
466 self.lastdiscard = ''
467
468 self.compress_type = zipinfo.compress_type
469 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000470
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471 self.closed = False
472 self.mode = "r"
473 self.name = zipinfo.filename
474
475 # read from compressed files in 64k blocks
476 self.compreadsize = 64*1024
477 if self.compress_type == ZIP_DEFLATED:
478 self.dc = zlib.decompressobj(-15)
479
480 def set_univ_newlines(self, univ_newlines):
481 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000482
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000483 # pick line separator char(s) based on universal newlines flag
484 self.nlSeps = ("\n", )
485 if self.univ_newlines:
486 self.nlSeps = ("\r\n", "\r", "\n")
487
488 def __iter__(self):
489 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000490
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000491 def next(self):
492 nextline = self.readline()
493 if not nextline:
494 raise StopIteration()
495
496 return nextline
497
498 def close(self):
499 self.closed = True
500
501 def _checkfornewline(self):
502 nl, nllen = -1, -1
503 if self.linebuffer:
504 # ugly check for cases where half of an \r\n pair was
505 # read on the last pass, and the \r was discarded. In this
506 # case we just throw away the \n at the start of the buffer.
507 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
508 self.linebuffer = self.linebuffer[1:]
509
Tim Petersea5962f2007-03-12 18:07:52 +0000510 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000511 nl = self.linebuffer.find(sep)
512 if nl >= 0:
513 nllen = len(sep)
514 return nl, nllen
515
516 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000517
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000518 def readline(self, size = -1):
519 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000520 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000521 """
522 if size < 0:
523 size = sys.maxint
524 elif size == 0:
525 return ''
526
527 # check for a newline already in buffer
528 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000529
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000530 if nl >= 0:
531 # the next line was already in the buffer
532 nl = min(nl, size)
533 else:
534 # no line break in buffer - try to read more
535 size -= len(self.linebuffer)
536 while nl < 0 and size > 0:
537 buf = self.read(min(size, 100))
538 if not buf:
539 break
540 self.linebuffer += buf
541 size -= len(buf)
542
543 # check for a newline in buffer
544 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000545
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000546 # we either ran out of bytes in the file, or
547 # met the specified size limit without finding a newline,
548 # so return current buffer
549 if nl < 0:
550 s = self.linebuffer
551 self.linebuffer = ''
552 return s
553
554 buf = self.linebuffer[:nl]
555 self.lastdiscard = self.linebuffer[nl:nl + nllen]
556 self.linebuffer = self.linebuffer[nl + nllen:]
557
558 # line is always returned with \n as newline char (except possibly
559 # for a final incomplete line in the file, which is handled above).
560 return buf + "\n"
561
562 def readlines(self, sizehint = -1):
563 """Return a list with all (following) lines. The sizehint parameter
564 is ignored in this implementation.
565 """
566 result = []
567 while True:
568 line = self.readline()
569 if not line: break
570 result.append(line)
571 return result
572
573 def read(self, size = None):
574 # act like file() obj and return empty string if size is 0
575 if size == 0:
576 return ''
577
578 # determine read size
579 bytesToRead = self.compress_size - self.bytes_read
580
581 # adjust read size for encrypted files since the first 12 bytes
582 # are for the encryption/password information
583 if self.decrypter is not None:
584 bytesToRead -= 12
585
586 if size is not None and size >= 0:
587 if self.compress_type == ZIP_STORED:
588 lr = len(self.readbuffer)
589 bytesToRead = min(bytesToRead, size - lr)
590 elif self.compress_type == ZIP_DEFLATED:
591 if len(self.readbuffer) > size:
592 # the user has requested fewer bytes than we've already
593 # pulled through the decompressor; don't read any more
594 bytesToRead = 0
595 else:
596 # user will use up the buffer, so read some more
597 lr = len(self.rawbuffer)
598 bytesToRead = min(bytesToRead, self.compreadsize - lr)
599
600 # avoid reading past end of file contents
601 if bytesToRead + self.bytes_read > self.compress_size:
602 bytesToRead = self.compress_size - self.bytes_read
603
604 # try to read from file (if necessary)
605 if bytesToRead > 0:
606 bytes = self.fileobj.read(bytesToRead)
607 self.bytes_read += len(bytes)
608 self.rawbuffer += bytes
609
610 # handle contents of raw buffer
611 if self.rawbuffer:
612 newdata = self.rawbuffer
613 self.rawbuffer = ''
614
615 # decrypt new data if we were given an object to handle that
616 if newdata and self.decrypter is not None:
617 newdata = ''.join(map(self.decrypter, newdata))
618
619 # decompress newly read data if necessary
620 if newdata and self.compress_type == ZIP_DEFLATED:
621 newdata = self.dc.decompress(newdata)
622 self.rawbuffer = self.dc.unconsumed_tail
623 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000624 # we're out of raw bytes (both from the file and
625 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000626 # decompressor is done
627 newdata += self.dc.flush()
628 # prevent decompressor from being used again
629 self.dc = None
630
631 self.readbuffer += newdata
632
633
634 # return what the user asked for
635 if size is None or len(self.readbuffer) <= size:
636 bytes = self.readbuffer
637 self.readbuffer = ''
638 else:
639 bytes = self.readbuffer[:size]
640 self.readbuffer = self.readbuffer[size:]
641
642 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000643
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000644
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000646 """ Class with methods to open, read, write, close, list zip files.
647
Martin v. Löwis8c436412008-07-03 12:51:14 +0000648 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000649
Fred Drake3d9091e2001-03-26 15:49:24 +0000650 file: Either the path to the file, or a file-like object.
651 If it is a path, the file will be opened and closed by ZipFile.
652 mode: The mode can be either read "r", write "w" or append "a".
653 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000654 allowZip64: if True ZipFile will create files with ZIP64 extensions when
655 needed, otherwise it will raise an exception when this would
656 be necessary.
657
Fred Drake3d9091e2001-03-26 15:49:24 +0000658 """
Fred Drake484d7352000-10-02 21:14:52 +0000659
Fred Drake90eac282001-02-28 05:29:34 +0000660 fp = None # Set here since __del__ checks it
661
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000662 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000663 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000664 if mode not in ("r", "w", "a"):
665 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
666
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000667 if compression == ZIP_STORED:
668 pass
669 elif compression == ZIP_DEFLATED:
670 if not zlib:
671 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000672 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000673 else:
674 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000675
676 self._allowZip64 = allowZip64
677 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000678 self.debug = 0 # Level of printing: 0 through 3
679 self.NameToInfo = {} # Find file info given name
680 self.filelist = [] # List of ZipInfo instances for archive
681 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000682 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000683 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000684 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000685
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000687 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000688 self._filePassed = 0
689 self.filename = file
690 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000691 try:
692 self.fp = open(file, modeDict[mode])
693 except IOError:
694 if mode == 'a':
695 mode = key = 'w'
696 self.fp = open(file, modeDict[mode])
697 else:
698 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000699 else:
700 self._filePassed = 1
701 self.fp = file
702 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000703
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000704 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000705 self._GetContents()
706 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000707 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000708 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000709 try: # See if file is a zip file
710 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000712 self.fp.seek(self.start_dir, 0)
713 except BadZipfile: # file is not a zip file, just append
714 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000715 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000716 if not self._filePassed:
717 self.fp.close()
718 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 raise RuntimeError, 'Mode must be "r", "w" or "a"'
720
721 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000722 """Read the directory, making sure we close the file if the format
723 is bad."""
724 try:
725 self._RealGetContents()
726 except BadZipfile:
727 if not self._filePassed:
728 self.fp.close()
729 self.fp = None
730 raise
731
732 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000733 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000735 endrec = _EndRecData(fp)
736 if not endrec:
737 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 if self.debug > 1:
739 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000740 size_cd = endrec[_ECD_SIZE] # bytes in central directory
741 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
742 self.comment = endrec[_ECD_COMMENT] # archive comment
743
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000745 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000746 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
747 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000748 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
749
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000751 inferred = concat + offset_cd
752 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 # self.start_dir: Position of start of central directory
754 self.start_dir = offset_cd + concat
755 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000756 data = fp.read(size_cd)
757 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000758 total = 0
759 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000760 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000761 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 raise BadZipfile, "Bad magic number for central directory"
763 centdir = struct.unpack(structCentralDir, centdir)
764 if self.debug > 2:
765 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000766 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 # Create ZipInfo instance to store file information
768 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000769 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
770 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000771 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 (x.create_version, x.create_system, x.extract_version, x.reserved,
773 x.flag_bits, x.compress_type, t, d,
774 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
775 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
776 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000777 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000779 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000780
781 x._decodeExtra()
782 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000783 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000784 self.filelist.append(x)
785 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000786
787 # update total bytes read from central directory
788 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
789 + centdir[_CD_EXTRA_FIELD_LENGTH]
790 + centdir[_CD_COMMENT_LENGTH])
791
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792 if self.debug > 2:
793 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000794
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000795
796 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000797 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 l = []
799 for data in self.filelist:
800 l.append(data.filename)
801 return l
802
803 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000804 """Return a list of class ZipInfo instances for files in the
805 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 return self.filelist
807
808 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000809 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
811 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000812 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
814
815 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000816 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000817 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 for zinfo in self.filelist:
819 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000820 # Read by chunks, to avoid an OverflowError or a
821 # MemoryError with very large embedded files.
822 f = self.open(zinfo.filename, "r")
823 while f.read(chunk_size): # Check CRC-32
824 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000825 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826 return zinfo.filename
827
828 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000829 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000830 info = self.NameToInfo.get(name)
831 if info is None:
832 raise KeyError(
833 'There is no item named %r in the archive' % name)
834
835 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000837 def setpassword(self, pwd):
838 """Set default password for encrypted files."""
839 self.pwd = pwd
840
841 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000842 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000843 return self.open(name, "r", pwd).read()
844
845 def open(self, name, mode="r", pwd=None):
846 """Return file-like object for 'name'."""
847 if mode not in ("r", "U", "rU"):
848 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 if not self.fp:
850 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000851 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000852
Tim Petersea5962f2007-03-12 18:07:52 +0000853 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000854 # given a file object in the constructor
855 if self._filePassed:
856 zef_file = self.fp
857 else:
858 zef_file = open(self.filename, 'rb')
859
Georg Brandl112aa502008-05-20 08:25:48 +0000860 # Make sure we have an info object
861 if isinstance(name, ZipInfo):
862 # 'name' is already an info object
863 zinfo = name
864 else:
865 # Get info object for name
866 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000867
868 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000869
870 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000871 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000872 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000873 raise BadZipfile, "Bad magic number for file header"
874
875 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000876 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000877 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000878 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000879
880 if fname != zinfo.orig_filename:
881 raise BadZipfile, \
882 'File name in directory "%s" and header "%s" differ.' % (
883 zinfo.orig_filename, fname)
884
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000885 # check for encrypted flag & handle password
886 is_encrypted = zinfo.flag_bits & 0x1
887 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000888 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000889 if not pwd:
890 pwd = self.pwd
891 if not pwd:
892 raise RuntimeError, "File %s is encrypted, " \
893 "password required for extraction" % name
894
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000895 zd = _ZipDecrypter(pwd)
896 # The first 12 bytes in the cypher stream is an encryption header
897 # used to strengthen the algorithm. The first 11 bytes are
898 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000899 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000900 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000901 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000902 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000903 if zinfo.flag_bits & 0x8:
904 # compare against the file type from extended local headers
905 check_byte = (zinfo._raw_time >> 8) & 0xff
906 else:
907 # compare against the CRC otherwise
908 check_byte = (zinfo.CRC >> 24) & 0xff
909 if ord(h[11]) != check_byte:
910 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000911
912 # build and return a ZipExtFile
913 if zd is None:
914 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000916 zef = ZipExtFile(zef_file, zinfo, zd)
917
918 # set universal newlines on ZipExtFile if necessary
919 if "U" in mode:
920 zef.set_univ_newlines(True)
921 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000922
Georg Brandl62416bc2008-01-07 18:47:44 +0000923 def extract(self, member, path=None, pwd=None):
924 """Extract a member from the archive to the current working directory,
925 using its full name. Its file information is extracted as accurately
926 as possible. `member' may be a filename or a ZipInfo object. You can
927 specify a different directory using `path'.
928 """
929 if not isinstance(member, ZipInfo):
930 member = self.getinfo(member)
931
932 if path is None:
933 path = os.getcwd()
934
935 return self._extract_member(member, path, pwd)
936
937 def extractall(self, path=None, members=None, pwd=None):
938 """Extract all members from the archive to the current working
939 directory. `path' specifies a different directory to extract to.
940 `members' is optional and must be a subset of the list returned
941 by namelist().
942 """
943 if members is None:
944 members = self.namelist()
945
946 for zipinfo in members:
947 self.extract(zipinfo, path, pwd)
948
949 def _extract_member(self, member, targetpath, pwd):
950 """Extract the ZipInfo object 'member' to a physical
951 file on the path targetpath.
952 """
953 # build the destination pathname, replacing
954 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000955 # Strip trailing path separator, unless it represents the root.
956 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
957 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000958 targetpath = targetpath[:-1]
959
960 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000961 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000962 targetpath = os.path.join(targetpath, member.filename[1:])
963 else:
964 targetpath = os.path.join(targetpath, member.filename)
965
966 targetpath = os.path.normpath(targetpath)
967
968 # Create all upper directories if necessary.
969 upperdirs = os.path.dirname(targetpath)
970 if upperdirs and not os.path.exists(upperdirs):
971 os.makedirs(upperdirs)
972
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000973 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000974 if not os.path.isdir(targetpath):
975 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000976 return targetpath
977
Georg Brandl112aa502008-05-20 08:25:48 +0000978 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000979 target = file(targetpath, "wb")
980 shutil.copyfileobj(source, target)
981 source.close()
982 target.close()
983
984 return targetpath
985
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000986 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000987 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000988 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000989 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 print "Duplicate name:", zinfo.filename
991 if self.mode not in ("w", "a"):
992 raise RuntimeError, 'write() requires mode "w" or "a"'
993 if not self.fp:
994 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000995 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
997 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000998 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1000 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001001 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001002 if zinfo.file_size > ZIP64_LIMIT:
1003 if not self._allowZip64:
1004 raise LargeZipFile("Filesize would require ZIP64 extensions")
1005 if zinfo.header_offset > ZIP64_LIMIT:
1006 if not self._allowZip64:
1007 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008
1009 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001010 """Put the bytes from filename into the archive under the name
1011 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001012 if not self.fp:
1013 raise RuntimeError(
1014 "Attempt to write to ZIP archive that was already closed")
1015
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001017 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001018 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019 date_time = mtime[0:6]
1020 # Create ZipInfo instance to store file information
1021 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001022 arcname = filename
1023 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1024 while arcname[0] in (os.sep, os.altsep):
1025 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001026 if isdir:
1027 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001028 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001029 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001030 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001031 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 else:
Tim Peterse1190062001-01-15 03:34:38 +00001033 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001034
1035 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001036 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001037 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001038
1039 self._writecheck(zinfo)
1040 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001041
1042 if isdir:
1043 zinfo.file_size = 0
1044 zinfo.compress_size = 0
1045 zinfo.CRC = 0
1046 self.filelist.append(zinfo)
1047 self.NameToInfo[zinfo.filename] = zinfo
1048 self.fp.write(zinfo.FileHeader())
1049 return
1050
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001051 with open(filename, "rb") as fp:
1052 # Must overwrite CRC and sizes with correct data later
1053 zinfo.CRC = CRC = 0
1054 zinfo.compress_size = compress_size = 0
1055 zinfo.file_size = file_size = 0
1056 self.fp.write(zinfo.FileHeader())
1057 if zinfo.compress_type == ZIP_DEFLATED:
1058 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1059 zlib.DEFLATED, -15)
1060 else:
1061 cmpr = None
1062 while 1:
1063 buf = fp.read(1024 * 8)
1064 if not buf:
1065 break
1066 file_size = file_size + len(buf)
1067 CRC = crc32(buf, CRC) & 0xffffffff
1068 if cmpr:
1069 buf = cmpr.compress(buf)
1070 compress_size = compress_size + len(buf)
1071 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 if cmpr:
1073 buf = cmpr.flush()
1074 compress_size = compress_size + len(buf)
1075 self.fp.write(buf)
1076 zinfo.compress_size = compress_size
1077 else:
1078 zinfo.compress_size = file_size
1079 zinfo.CRC = CRC
1080 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001081 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001082 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001083 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001084 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001086 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 self.filelist.append(zinfo)
1088 self.NameToInfo[zinfo.filename] = zinfo
1089
Just van Rossumb083cb32002-12-12 12:23:32 +00001090 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001091 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001092 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1093 the name of the file in the archive."""
1094 if not isinstance(zinfo_or_arcname, ZipInfo):
1095 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001096 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001097 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001098 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001099 else:
1100 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001101
1102 if not self.fp:
1103 raise RuntimeError(
1104 "Attempt to write to ZIP archive that was already closed")
1105
Tim Peterse1190062001-01-15 03:34:38 +00001106 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001107 zinfo.header_offset = self.fp.tell() # Start of header bytes
1108 self._writecheck(zinfo)
1109 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001110 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 if zinfo.compress_type == ZIP_DEFLATED:
1112 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1113 zlib.DEFLATED, -15)
1114 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001115 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 else:
1117 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001118 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001121 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001123 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001124 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001125 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 self.filelist.append(zinfo)
1127 self.NameToInfo[zinfo.filename] = zinfo
1128
1129 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001130 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001131 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132
1133 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001134 """Close the file, and for mode "w" and "a" write the ending
1135 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001136 if self.fp is None:
1137 return
Tim Petersa608bb22006-06-15 18:06:29 +00001138
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001139 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 count = 0
1141 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001142 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 count = count + 1
1144 dt = zinfo.date_time
1145 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001146 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001147 extra = []
1148 if zinfo.file_size > ZIP64_LIMIT \
1149 or zinfo.compress_size > ZIP64_LIMIT:
1150 extra.append(zinfo.file_size)
1151 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001152 file_size = 0xffffffff
1153 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001154 else:
1155 file_size = zinfo.file_size
1156 compress_size = zinfo.compress_size
1157
1158 if zinfo.header_offset > ZIP64_LIMIT:
1159 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001160 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001161 else:
1162 header_offset = zinfo.header_offset
1163
1164 extra_data = zinfo.extra
1165 if extra:
1166 # Append a ZIP64 field to the extra's
1167 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001168 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001169 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001170
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001171 extract_version = max(45, zinfo.extract_version)
1172 create_version = max(45, zinfo.create_version)
1173 else:
1174 extract_version = zinfo.extract_version
1175 create_version = zinfo.create_version
1176
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001177 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001178 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001179 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001180 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001181 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001182 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001183 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001184 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001185 0, zinfo.internal_attr, zinfo.external_attr,
1186 header_offset)
1187 except DeprecationWarning:
1188 print >>sys.stderr, (structCentralDir,
1189 stringCentralDir, create_version,
1190 zinfo.create_system, extract_version, zinfo.reserved,
1191 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1192 zinfo.CRC, compress_size, file_size,
1193 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1194 0, zinfo.internal_attr, zinfo.external_attr,
1195 header_offset)
1196 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001198 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001199 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001200 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001201
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001202 pos2 = self.fp.tell()
1203 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001204 centDirCount = count
1205 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001206 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001207 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1208 centDirOffset > ZIP64_LIMIT or
1209 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001210 # Need to write the ZIP64 end-of-archive records
1211 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001212 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001213 44, 45, 45, 0, 0, centDirCount, centDirCount,
1214 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001215 self.fp.write(zip64endrec)
1216
1217 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001218 structEndArchive64Locator,
1219 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001220 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001221 centDirCount = min(centDirCount, 0xFFFF)
1222 centDirSize = min(centDirSize, 0xFFFFFFFF)
1223 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001224
Martin v. Löwis8c436412008-07-03 12:51:14 +00001225 # check for valid comment length
1226 if len(self.comment) >= ZIP_MAX_COMMENT:
1227 if self.debug > 0:
1228 msg = 'Archive comment is too long; truncating to %d bytes' \
1229 % ZIP_MAX_COMMENT
1230 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001231
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001232 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001233 0, 0, centDirCount, centDirCount,
1234 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001235 self.fp.write(endrec)
1236 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001237 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001238
Fred Drake3d9091e2001-03-26 15:49:24 +00001239 if not self._filePassed:
1240 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001241 self.fp = None
1242
1243
1244class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001245 """Class to create ZIP archives with Python library files and packages."""
1246
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001247 def writepy(self, pathname, basename = ""):
1248 """Add all files from "pathname" to the ZIP archive.
1249
Fred Drake484d7352000-10-02 21:14:52 +00001250 If pathname is a package directory, search the directory and
1251 all package subdirectories recursively for all *.py and enter
1252 the modules into the archive. If pathname is a plain
1253 directory, listdir *.py and enter all modules. Else, pathname
1254 must be a Python *.py file and the module will be put into the
1255 archive. Added modules are always module.pyo or module.pyc.
1256 This method will compile the module.py into module.pyc if
1257 necessary.
1258 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001259 dir, name = os.path.split(pathname)
1260 if os.path.isdir(pathname):
1261 initname = os.path.join(pathname, "__init__.py")
1262 if os.path.isfile(initname):
1263 # This is a package directory, add it
1264 if basename:
1265 basename = "%s/%s" % (basename, name)
1266 else:
1267 basename = name
1268 if self.debug:
1269 print "Adding package in", pathname, "as", basename
1270 fname, arcname = self._get_codename(initname[0:-3], basename)
1271 if self.debug:
1272 print "Adding", arcname
1273 self.write(fname, arcname)
1274 dirlist = os.listdir(pathname)
1275 dirlist.remove("__init__.py")
1276 # Add all *.py files and package subdirectories
1277 for filename in dirlist:
1278 path = os.path.join(pathname, filename)
1279 root, ext = os.path.splitext(filename)
1280 if os.path.isdir(path):
1281 if os.path.isfile(os.path.join(path, "__init__.py")):
1282 # This is a package directory, add it
1283 self.writepy(path, basename) # Recursive call
1284 elif ext == ".py":
1285 fname, arcname = self._get_codename(path[0:-3],
1286 basename)
1287 if self.debug:
1288 print "Adding", arcname
1289 self.write(fname, arcname)
1290 else:
1291 # This is NOT a package directory, add its files at top level
1292 if self.debug:
1293 print "Adding files from directory", pathname
1294 for filename in os.listdir(pathname):
1295 path = os.path.join(pathname, filename)
1296 root, ext = os.path.splitext(filename)
1297 if ext == ".py":
1298 fname, arcname = self._get_codename(path[0:-3],
1299 basename)
1300 if self.debug:
1301 print "Adding", arcname
1302 self.write(fname, arcname)
1303 else:
1304 if pathname[-3:] != ".py":
1305 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001306 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 fname, arcname = self._get_codename(pathname[0:-3], basename)
1308 if self.debug:
1309 print "Adding file", arcname
1310 self.write(fname, arcname)
1311
1312 def _get_codename(self, pathname, basename):
1313 """Return (filename, archivename) for the path.
1314
Fred Drake484d7352000-10-02 21:14:52 +00001315 Given a module name path, return the correct file path and
1316 archive name, compiling if necessary. For example, given
1317 /python/lib/string, return (/python/lib/string.pyc, string).
1318 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 file_py = pathname + ".py"
1320 file_pyc = pathname + ".pyc"
1321 file_pyo = pathname + ".pyo"
1322 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001323 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001324 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001326 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001327 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 if self.debug:
1329 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001330 try:
1331 py_compile.compile(file_py, file_pyc, None, True)
1332 except py_compile.PyCompileError,err:
1333 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334 fname = file_pyc
1335 else:
1336 fname = file_pyc
1337 archivename = os.path.split(fname)[1]
1338 if basename:
1339 archivename = "%s/%s" % (basename, archivename)
1340 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001341
1342
1343def main(args = None):
1344 import textwrap
1345 USAGE=textwrap.dedent("""\
1346 Usage:
1347 zipfile.py -l zipfile.zip # Show listing of a zipfile
1348 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1349 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1350 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1351 """)
1352 if args is None:
1353 args = sys.argv[1:]
1354
1355 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1356 print USAGE
1357 sys.exit(1)
1358
1359 if args[0] == '-l':
1360 if len(args) != 2:
1361 print USAGE
1362 sys.exit(1)
1363 zf = ZipFile(args[1], 'r')
1364 zf.printdir()
1365 zf.close()
1366
1367 elif args[0] == '-t':
1368 if len(args) != 2:
1369 print USAGE
1370 sys.exit(1)
1371 zf = ZipFile(args[1], 'r')
1372 zf.testzip()
1373 print "Done testing"
1374
1375 elif args[0] == '-e':
1376 if len(args) != 3:
1377 print USAGE
1378 sys.exit(1)
1379
1380 zf = ZipFile(args[1], 'r')
1381 out = args[2]
1382 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001383 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001384 tgt = os.path.join(out, path[2:])
1385 else:
1386 tgt = os.path.join(out, path)
1387
1388 tgtdir = os.path.dirname(tgt)
1389 if not os.path.exists(tgtdir):
1390 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001391 with open(tgt, 'wb') as fp:
1392 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001393 zf.close()
1394
1395 elif args[0] == '-c':
1396 if len(args) < 3:
1397 print USAGE
1398 sys.exit(1)
1399
1400 def addToZip(zf, path, zippath):
1401 if os.path.isfile(path):
1402 zf.write(path, zippath, ZIP_DEFLATED)
1403 elif os.path.isdir(path):
1404 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001405 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001406 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001407 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001408
1409 zf = ZipFile(args[1], 'w', allowZip64=True)
1410 for src in args[2:]:
1411 addToZip(zf, src, os.path.basename(src))
1412
1413 zf.close()
1414
1415if __name__ == "__main__":
1416 main()