blob: fe012968b373f7cd8ee5748ee0e5f6efe8e5b9b3 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00005import binascii, cStringIO, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +000029ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000131def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 try:
134 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000135 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000143def _EndRecData64(fpin, offset, endrec):
144 """
145 Read the ZIP64 end-of-archive records and use that to update endrec
146 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000147 fpin.seek(offset - sizeEndCentDir64Locator, 2)
148 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000149 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
150 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000151 return endrec
152
153 if diskno != 0 or disks != 1:
154 raise BadZipfile("zipfiles that span multiple disks are not supported")
155
Tim Petersa608bb22006-06-15 18:06:29 +0000156 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000157 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
158 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000159 sig, sz, create_version, read_version, disk_num, disk_dir, \
160 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 struct.unpack(structEndArchive64, data)
162 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000166 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000167 endrec[_ECD_DISK_NUMBER] = disk_num
168 endrec[_ECD_DISK_START] = disk_dir
169 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
170 endrec[_ECD_ENTRIES_TOTAL] = dircount2
171 endrec[_ECD_SIZE] = dirsize
172 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 return endrec
174
175
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000176def _EndRecData(fpin):
177 """Return data from the "End of Central Directory" record, or None.
178
179 The data is a list of the nine items in the ZIP "End of central dir"
180 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181
182 # Determine file size
183 fpin.seek(0, 2)
184 filesize = fpin.tell()
185
186 # Check to see if this is ZIP file with no archive comment (the
187 # "end of central directory" structure should be the last item in the
188 # file if this is the case).
189 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000191 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000192 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000193 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000194 endrec=list(endrec)
195
196 # Append a blank comment and record start offset
197 endrec.append("")
198 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000199
Amaury Forgeot d'Arc945fdd62009-01-18 20:27:45 +0000200 # Try to read the "Zip64 end of central directory" structure
201 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000202
203 # Either this is not a ZIP file, or it is a ZIP file with an archive
204 # comment. Search the end of the file for the "end of central directory"
205 # record signature. The comment is the last item in the ZIP file and may be
206 # up to 64K long. It is assumed that the "end of central directory" magic
207 # number does not appear in the comment.
208 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
209 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000210 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000211 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000212 if start >= 0:
213 # found the magic number; attempt to unpack and interpret
214 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000215 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216 comment = data[start+sizeEndCentDir:]
217 # check that comment length is correct
218 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219 # Append the archive comment and start offset
220 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000221 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc945fdd62009-01-18 20:27:45 +0000222
223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, maxCommentStart + start - filesize,
225 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000226
227 # Unable to find a valid end of central directory structure
228 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229
Fred Drake484d7352000-10-02 21:14:52 +0000230
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000231class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Class with attributes describing each file in the ZIP archive."""
233
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000234 __slots__ = (
235 'orig_filename',
236 'filename',
237 'date_time',
238 'compress_type',
239 'comment',
240 'extra',
241 'create_system',
242 'create_version',
243 'extract_version',
244 'reserved',
245 'flag_bits',
246 'volume',
247 'internal_attr',
248 'external_attr',
249 'header_offset',
250 'CRC',
251 'compress_size',
252 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000253 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000254 )
255
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000256 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000257 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000258
259 # Terminate the file name at the first null byte. Null bytes in file
260 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000261 null_byte = filename.find(chr(0))
262 if null_byte >= 0:
263 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000264 # This is used to ensure paths in generated ZIP files always use
265 # forward slashes as the directory separator, as required by the
266 # ZIP format specification.
267 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000268 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000269
Greg Ward8e36d282003-06-18 00:53:06 +0000270 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000271 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000272 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000273 self.compress_type = ZIP_STORED # Type of compression for the file
274 self.comment = "" # Comment for each file
275 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000276 if sys.platform == 'win32':
277 self.create_system = 0 # System which created ZIP archive
278 else:
279 # Assume everything else is unix-y
280 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000281 self.create_version = 20 # Version which created ZIP archive
282 self.extract_version = 20 # Version needed to extract archive
283 self.reserved = 0 # Must be zero
284 self.flag_bits = 0 # ZIP flag bits
285 self.volume = 0 # Volume number of file header
286 self.internal_attr = 0 # Internal attributes
287 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000289 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000290 # CRC CRC-32 of the uncompressed file
291 # compress_size Size of the compressed file
292 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000293
294 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000295 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296 dt = self.date_time
297 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000298 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000300 # Set these to zero because we write them after the file data
301 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302 else:
Tim Peterse1190062001-01-15 03:34:38 +0000303 CRC = self.CRC
304 compress_size = self.compress_size
305 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000306
307 extra = self.extra
308
309 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
310 # File is larger than what fits into a 4 byte integer,
311 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000312 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000313 extra = extra + struct.pack(fmt,
314 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000315 file_size = 0xffffffff
316 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000317 self.extract_version = max(45, self.extract_version)
318 self.create_version = max(45, self.extract_version)
319
Martin v. Löwis471617d2008-05-05 17:16:58 +0000320 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000321 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000322 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 self.compress_type, dostime, dosdate, CRC,
324 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000325 len(filename), len(extra))
326 return header + filename + extra
327
328 def _encodeFilenameFlags(self):
329 if isinstance(self.filename, unicode):
330 try:
331 return self.filename.encode('ascii'), self.flag_bits
332 except UnicodeEncodeError:
333 return self.filename.encode('utf-8'), self.flag_bits | 0x800
334 else:
335 return self.filename, self.flag_bits
336
337 def _decodeFilename(self):
338 if self.flag_bits & 0x800:
339 return self.filename.decode('utf-8')
340 else:
341 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000343 def _decodeExtra(self):
344 # Try to decode the extra field.
345 extra = self.extra
346 unpack = struct.unpack
347 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000348 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000349 if tp == 1:
350 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000351 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000352 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000353 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000354 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000355 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000356 elif ln == 0:
357 counts = ()
358 else:
359 raise RuntimeError, "Corrupt extra field %s"%(ln,)
360
361 idx = 0
362
363 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000364 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000365 self.file_size = counts[idx]
366 idx += 1
367
Martin v. Löwis8c436412008-07-03 12:51:14 +0000368 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 self.compress_size = counts[idx]
370 idx += 1
371
Martin v. Löwis8c436412008-07-03 12:51:14 +0000372 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 old = self.header_offset
374 self.header_offset = counts[idx]
375 idx+=1
376
377 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000378
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000380class _ZipDecrypter:
381 """Class to handle decryption of files stored within a ZIP archive.
382
383 ZIP supports a password-based form of encryption. Even though known
384 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000385 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000386
387 Usage:
388 zd = _ZipDecrypter(mypwd)
389 plain_char = zd(cypher_char)
390 plain_text = map(zd, cypher_text)
391 """
392
393 def _GenerateCRCTable():
394 """Generate a CRC-32 table.
395
396 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
397 internal keys. We noticed that a direct implementation is faster than
398 relying on binascii.crc32().
399 """
400 poly = 0xedb88320
401 table = [0] * 256
402 for i in range(256):
403 crc = i
404 for j in range(8):
405 if crc & 1:
406 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
407 else:
408 crc = ((crc >> 1) & 0x7FFFFFFF)
409 table[i] = crc
410 return table
411 crctable = _GenerateCRCTable()
412
413 def _crc32(self, ch, crc):
414 """Compute the CRC32 primitive on one byte."""
415 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
416
417 def __init__(self, pwd):
418 self.key0 = 305419896
419 self.key1 = 591751049
420 self.key2 = 878082192
421 for p in pwd:
422 self._UpdateKeys(p)
423
424 def _UpdateKeys(self, c):
425 self.key0 = self._crc32(c, self.key0)
426 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
427 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
428 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
429
430 def __call__(self, c):
431 """Decrypt a single character."""
432 c = ord(c)
433 k = self.key2 | 2
434 c = c ^ (((k * (k^1)) >> 8) & 255)
435 c = chr(c)
436 self._UpdateKeys(c)
437 return c
438
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000439class ZipExtFile:
440 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000441 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000442 """
Tim Petersea5962f2007-03-12 18:07:52 +0000443
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000444 def __init__(self, fileobj, zipinfo, decrypt=None):
445 self.fileobj = fileobj
446 self.decrypter = decrypt
447 self.bytes_read = 0L
448 self.rawbuffer = ''
449 self.readbuffer = ''
450 self.linebuffer = ''
451 self.eof = False
452 self.univ_newlines = False
453 self.nlSeps = ("\n", )
454 self.lastdiscard = ''
455
456 self.compress_type = zipinfo.compress_type
457 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000458
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 self.closed = False
460 self.mode = "r"
461 self.name = zipinfo.filename
462
463 # read from compressed files in 64k blocks
464 self.compreadsize = 64*1024
465 if self.compress_type == ZIP_DEFLATED:
466 self.dc = zlib.decompressobj(-15)
467
468 def set_univ_newlines(self, univ_newlines):
469 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000470
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471 # pick line separator char(s) based on universal newlines flag
472 self.nlSeps = ("\n", )
473 if self.univ_newlines:
474 self.nlSeps = ("\r\n", "\r", "\n")
475
476 def __iter__(self):
477 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000478
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000479 def next(self):
480 nextline = self.readline()
481 if not nextline:
482 raise StopIteration()
483
484 return nextline
485
486 def close(self):
487 self.closed = True
488
489 def _checkfornewline(self):
490 nl, nllen = -1, -1
491 if self.linebuffer:
492 # ugly check for cases where half of an \r\n pair was
493 # read on the last pass, and the \r was discarded. In this
494 # case we just throw away the \n at the start of the buffer.
495 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
496 self.linebuffer = self.linebuffer[1:]
497
Tim Petersea5962f2007-03-12 18:07:52 +0000498 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000499 nl = self.linebuffer.find(sep)
500 if nl >= 0:
501 nllen = len(sep)
502 return nl, nllen
503
504 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000505
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000506 def readline(self, size = -1):
507 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000508 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000509 """
510 if size < 0:
511 size = sys.maxint
512 elif size == 0:
513 return ''
514
515 # check for a newline already in buffer
516 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000517
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000518 if nl >= 0:
519 # the next line was already in the buffer
520 nl = min(nl, size)
521 else:
522 # no line break in buffer - try to read more
523 size -= len(self.linebuffer)
524 while nl < 0 and size > 0:
525 buf = self.read(min(size, 100))
526 if not buf:
527 break
528 self.linebuffer += buf
529 size -= len(buf)
530
531 # check for a newline in buffer
532 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000533
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000534 # we either ran out of bytes in the file, or
535 # met the specified size limit without finding a newline,
536 # so return current buffer
537 if nl < 0:
538 s = self.linebuffer
539 self.linebuffer = ''
540 return s
541
542 buf = self.linebuffer[:nl]
543 self.lastdiscard = self.linebuffer[nl:nl + nllen]
544 self.linebuffer = self.linebuffer[nl + nllen:]
545
546 # line is always returned with \n as newline char (except possibly
547 # for a final incomplete line in the file, which is handled above).
548 return buf + "\n"
549
550 def readlines(self, sizehint = -1):
551 """Return a list with all (following) lines. The sizehint parameter
552 is ignored in this implementation.
553 """
554 result = []
555 while True:
556 line = self.readline()
557 if not line: break
558 result.append(line)
559 return result
560
561 def read(self, size = None):
562 # act like file() obj and return empty string if size is 0
563 if size == 0:
564 return ''
565
566 # determine read size
567 bytesToRead = self.compress_size - self.bytes_read
568
569 # adjust read size for encrypted files since the first 12 bytes
570 # are for the encryption/password information
571 if self.decrypter is not None:
572 bytesToRead -= 12
573
574 if size is not None and size >= 0:
575 if self.compress_type == ZIP_STORED:
576 lr = len(self.readbuffer)
577 bytesToRead = min(bytesToRead, size - lr)
578 elif self.compress_type == ZIP_DEFLATED:
579 if len(self.readbuffer) > size:
580 # the user has requested fewer bytes than we've already
581 # pulled through the decompressor; don't read any more
582 bytesToRead = 0
583 else:
584 # user will use up the buffer, so read some more
585 lr = len(self.rawbuffer)
586 bytesToRead = min(bytesToRead, self.compreadsize - lr)
587
588 # avoid reading past end of file contents
589 if bytesToRead + self.bytes_read > self.compress_size:
590 bytesToRead = self.compress_size - self.bytes_read
591
592 # try to read from file (if necessary)
593 if bytesToRead > 0:
594 bytes = self.fileobj.read(bytesToRead)
595 self.bytes_read += len(bytes)
596 self.rawbuffer += bytes
597
598 # handle contents of raw buffer
599 if self.rawbuffer:
600 newdata = self.rawbuffer
601 self.rawbuffer = ''
602
603 # decrypt new data if we were given an object to handle that
604 if newdata and self.decrypter is not None:
605 newdata = ''.join(map(self.decrypter, newdata))
606
607 # decompress newly read data if necessary
608 if newdata and self.compress_type == ZIP_DEFLATED:
609 newdata = self.dc.decompress(newdata)
610 self.rawbuffer = self.dc.unconsumed_tail
611 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000612 # we're out of raw bytes (both from the file and
613 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000614 # decompressor is done
615 newdata += self.dc.flush()
616 # prevent decompressor from being used again
617 self.dc = None
618
619 self.readbuffer += newdata
620
621
622 # return what the user asked for
623 if size is None or len(self.readbuffer) <= size:
624 bytes = self.readbuffer
625 self.readbuffer = ''
626 else:
627 bytes = self.readbuffer[:size]
628 self.readbuffer = self.readbuffer[size:]
629
630 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000631
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000632
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000633class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000634 """ Class with methods to open, read, write, close, list zip files.
635
Martin v. Löwis8c436412008-07-03 12:51:14 +0000636 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000637
Fred Drake3d9091e2001-03-26 15:49:24 +0000638 file: Either the path to the file, or a file-like object.
639 If it is a path, the file will be opened and closed by ZipFile.
640 mode: The mode can be either read "r", write "w" or append "a".
641 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000642 allowZip64: if True ZipFile will create files with ZIP64 extensions when
643 needed, otherwise it will raise an exception when this would
644 be necessary.
645
Fred Drake3d9091e2001-03-26 15:49:24 +0000646 """
Fred Drake484d7352000-10-02 21:14:52 +0000647
Fred Drake90eac282001-02-28 05:29:34 +0000648 fp = None # Set here since __del__ checks it
649
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000650 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000651 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000652 if mode not in ("r", "w", "a"):
653 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
654
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000655 if compression == ZIP_STORED:
656 pass
657 elif compression == ZIP_DEFLATED:
658 if not zlib:
659 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000660 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 else:
662 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000663
664 self._allowZip64 = allowZip64
665 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000666 self.debug = 0 # Level of printing: 0 through 3
667 self.NameToInfo = {} # Find file info given name
668 self.filelist = [] # List of ZipInfo instances for archive
669 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000670 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000671 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000672 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000673
Fred Drake3d9091e2001-03-26 15:49:24 +0000674 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000675 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000676 self._filePassed = 0
677 self.filename = file
678 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000679 try:
680 self.fp = open(file, modeDict[mode])
681 except IOError:
682 if mode == 'a':
683 mode = key = 'w'
684 self.fp = open(file, modeDict[mode])
685 else:
686 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000687 else:
688 self._filePassed = 1
689 self.fp = file
690 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000691
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000692 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000693 self._GetContents()
694 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000697 try: # See if file is a zip file
698 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000700 self.fp.seek(self.start_dir, 0)
701 except BadZipfile: # file is not a zip file, just append
702 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000704 if not self._filePassed:
705 self.fp.close()
706 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 raise RuntimeError, 'Mode must be "r", "w" or "a"'
708
709 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000710 """Read the directory, making sure we close the file if the format
711 is bad."""
712 try:
713 self._RealGetContents()
714 except BadZipfile:
715 if not self._filePassed:
716 self.fp.close()
717 self.fp = None
718 raise
719
720 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000721 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000723 endrec = _EndRecData(fp)
724 if not endrec:
725 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 if self.debug > 1:
727 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000728 size_cd = endrec[_ECD_SIZE] # bytes in central directory
729 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
730 self.comment = endrec[_ECD_COMMENT] # archive comment
731
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000733 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000734 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
735 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000736 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
737
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000739 inferred = concat + offset_cd
740 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 # self.start_dir: Position of start of central directory
742 self.start_dir = offset_cd + concat
743 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000744 data = fp.read(size_cd)
745 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 total = 0
747 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000748 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000749 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 raise BadZipfile, "Bad magic number for central directory"
751 centdir = struct.unpack(structCentralDir, centdir)
752 if self.debug > 2:
753 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000754 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 # Create ZipInfo instance to store file information
756 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000757 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
758 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000759 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 (x.create_version, x.create_system, x.extract_version, x.reserved,
761 x.flag_bits, x.compress_type, t, d,
762 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
763 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
764 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000765 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000767 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000768
769 x._decodeExtra()
770 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000771 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 self.filelist.append(x)
773 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000774
775 # update total bytes read from central directory
776 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
777 + centdir[_CD_EXTRA_FIELD_LENGTH]
778 + centdir[_CD_COMMENT_LENGTH])
779
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 if self.debug > 2:
781 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000782
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783
784 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000785 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 l = []
787 for data in self.filelist:
788 l.append(data.filename)
789 return l
790
791 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000792 """Return a list of class ZipInfo instances for files in the
793 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 return self.filelist
795
796 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000797 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
799 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000800 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
802
803 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000804 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000805 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 for zinfo in self.filelist:
807 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000808 # Read by chunks, to avoid an OverflowError or a
809 # MemoryError with very large embedded files.
810 f = self.open(zinfo.filename, "r")
811 while f.read(chunk_size): # Check CRC-32
812 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000813 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 return zinfo.filename
815
816 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000817 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000818 info = self.NameToInfo.get(name)
819 if info is None:
820 raise KeyError(
821 'There is no item named %r in the archive' % name)
822
823 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000825 def setpassword(self, pwd):
826 """Set default password for encrypted files."""
827 self.pwd = pwd
828
829 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000830 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000831 return self.open(name, "r", pwd).read()
832
833 def open(self, name, mode="r", pwd=None):
834 """Return file-like object for 'name'."""
835 if mode not in ("r", "U", "rU"):
836 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 if not self.fp:
838 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000839 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000840
Tim Petersea5962f2007-03-12 18:07:52 +0000841 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000842 # given a file object in the constructor
843 if self._filePassed:
844 zef_file = self.fp
845 else:
846 zef_file = open(self.filename, 'rb')
847
Georg Brandl112aa502008-05-20 08:25:48 +0000848 # Make sure we have an info object
849 if isinstance(name, ZipInfo):
850 # 'name' is already an info object
851 zinfo = name
852 else:
853 # Get info object for name
854 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000855
856 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000857
858 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000859 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000860 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000861 raise BadZipfile, "Bad magic number for file header"
862
863 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000864 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000865 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000866 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000867
868 if fname != zinfo.orig_filename:
869 raise BadZipfile, \
870 'File name in directory "%s" and header "%s" differ.' % (
871 zinfo.orig_filename, fname)
872
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000873 # check for encrypted flag & handle password
874 is_encrypted = zinfo.flag_bits & 0x1
875 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000876 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000877 if not pwd:
878 pwd = self.pwd
879 if not pwd:
880 raise RuntimeError, "File %s is encrypted, " \
881 "password required for extraction" % name
882
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000883 zd = _ZipDecrypter(pwd)
884 # The first 12 bytes in the cypher stream is an encryption header
885 # used to strengthen the algorithm. The first 11 bytes are
886 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000887 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000888 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000889 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000890 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000891 if zinfo.flag_bits & 0x8:
892 # compare against the file type from extended local headers
893 check_byte = (zinfo._raw_time >> 8) & 0xff
894 else:
895 # compare against the CRC otherwise
896 check_byte = (zinfo.CRC >> 24) & 0xff
897 if ord(h[11]) != check_byte:
898 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000899
900 # build and return a ZipExtFile
901 if zd is None:
902 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000903 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904 zef = ZipExtFile(zef_file, zinfo, zd)
905
906 # set universal newlines on ZipExtFile if necessary
907 if "U" in mode:
908 zef.set_univ_newlines(True)
909 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910
Georg Brandl62416bc2008-01-07 18:47:44 +0000911 def extract(self, member, path=None, pwd=None):
912 """Extract a member from the archive to the current working directory,
913 using its full name. Its file information is extracted as accurately
914 as possible. `member' may be a filename or a ZipInfo object. You can
915 specify a different directory using `path'.
916 """
917 if not isinstance(member, ZipInfo):
918 member = self.getinfo(member)
919
920 if path is None:
921 path = os.getcwd()
922
923 return self._extract_member(member, path, pwd)
924
925 def extractall(self, path=None, members=None, pwd=None):
926 """Extract all members from the archive to the current working
927 directory. `path' specifies a different directory to extract to.
928 `members' is optional and must be a subset of the list returned
929 by namelist().
930 """
931 if members is None:
932 members = self.namelist()
933
934 for zipinfo in members:
935 self.extract(zipinfo, path, pwd)
936
937 def _extract_member(self, member, targetpath, pwd):
938 """Extract the ZipInfo object 'member' to a physical
939 file on the path targetpath.
940 """
941 # build the destination pathname, replacing
942 # forward slashes to platform specific separators.
Martin v. Löwis3a8071a2009-01-24 14:04:33 +0000943 if targetpath[-1:] in (os.path.sep, os.path.altsep):
Georg Brandl62416bc2008-01-07 18:47:44 +0000944 targetpath = targetpath[:-1]
945
946 # don't include leading "/" from file name if present
Martin v. Löwis3a8071a2009-01-24 14:04:33 +0000947 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000948 targetpath = os.path.join(targetpath, member.filename[1:])
949 else:
950 targetpath = os.path.join(targetpath, member.filename)
951
952 targetpath = os.path.normpath(targetpath)
953
954 # Create all upper directories if necessary.
955 upperdirs = os.path.dirname(targetpath)
956 if upperdirs and not os.path.exists(upperdirs):
957 os.makedirs(upperdirs)
958
Martin v. Löwis3a8071a2009-01-24 14:04:33 +0000959 if member.filename[-1] == '/':
960 os.mkdir(targetpath)
961 return targetpath
962
Georg Brandl112aa502008-05-20 08:25:48 +0000963 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000964 target = file(targetpath, "wb")
965 shutil.copyfileobj(source, target)
966 source.close()
967 target.close()
968
969 return targetpath
970
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000971 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000972 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000973 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000974 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000975 print "Duplicate name:", zinfo.filename
976 if self.mode not in ("w", "a"):
977 raise RuntimeError, 'write() requires mode "w" or "a"'
978 if not self.fp:
979 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000980 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
982 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000983 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
985 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000986 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000987 if zinfo.file_size > ZIP64_LIMIT:
988 if not self._allowZip64:
989 raise LargeZipFile("Filesize would require ZIP64 extensions")
990 if zinfo.header_offset > ZIP64_LIMIT:
991 if not self._allowZip64:
992 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993
994 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000995 """Put the bytes from filename into the archive under the name
996 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000997 if not self.fp:
998 raise RuntimeError(
999 "Attempt to write to ZIP archive that was already closed")
1000
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 st = os.stat(filename)
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00001002 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001003 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 date_time = mtime[0:6]
1005 # Create ZipInfo instance to store file information
1006 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001007 arcname = filename
1008 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1009 while arcname[0] in (os.sep, os.altsep):
1010 arcname = arcname[1:]
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00001011 if isdir:
1012 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001013 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001014 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001016 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017 else:
Tim Peterse1190062001-01-15 03:34:38 +00001018 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001019
1020 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001021 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001022 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001023
1024 self._writecheck(zinfo)
1025 self._didModify = True
Martin v. Löwis3a8071a2009-01-24 14:04:33 +00001026
1027 if isdir:
1028 zinfo.file_size = 0
1029 zinfo.compress_size = 0
1030 zinfo.CRC = 0
1031 self.filelist.append(zinfo)
1032 self.NameToInfo[zinfo.filename] = zinfo
1033 self.fp.write(zinfo.FileHeader())
1034 return
1035
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001036 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001037 # Must overwrite CRC and sizes with correct data later
1038 zinfo.CRC = CRC = 0
1039 zinfo.compress_size = compress_size = 0
1040 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042 if zinfo.compress_type == ZIP_DEFLATED:
1043 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1044 zlib.DEFLATED, -15)
1045 else:
1046 cmpr = None
1047 while 1:
1048 buf = fp.read(1024 * 8)
1049 if not buf:
1050 break
1051 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001052 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if cmpr:
1054 buf = cmpr.compress(buf)
1055 compress_size = compress_size + len(buf)
1056 self.fp.write(buf)
1057 fp.close()
1058 if cmpr:
1059 buf = cmpr.flush()
1060 compress_size = compress_size + len(buf)
1061 self.fp.write(buf)
1062 zinfo.compress_size = compress_size
1063 else:
1064 zinfo.compress_size = file_size
1065 zinfo.CRC = CRC
1066 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001067 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001068 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001069 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001070 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001072 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 self.filelist.append(zinfo)
1074 self.NameToInfo[zinfo.filename] = zinfo
1075
Just van Rossumb083cb32002-12-12 12:23:32 +00001076 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001077 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001078 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1079 the name of the file in the archive."""
1080 if not isinstance(zinfo_or_arcname, ZipInfo):
1081 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001082 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001083 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001084 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001085 else:
1086 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001087
1088 if not self.fp:
1089 raise RuntimeError(
1090 "Attempt to write to ZIP archive that was already closed")
1091
Tim Peterse1190062001-01-15 03:34:38 +00001092 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001093 zinfo.header_offset = self.fp.tell() # Start of header bytes
1094 self._writecheck(zinfo)
1095 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001096 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 if zinfo.compress_type == ZIP_DEFLATED:
1098 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1099 zlib.DEFLATED, -15)
1100 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001101 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 else:
1103 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001104 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001107 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001109 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001110 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001111 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 self.filelist.append(zinfo)
1113 self.NameToInfo[zinfo.filename] = zinfo
1114
1115 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001116 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001117 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118
1119 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001120 """Close the file, and for mode "w" and "a" write the ending
1121 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001122 if self.fp is None:
1123 return
Tim Petersa608bb22006-06-15 18:06:29 +00001124
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001125 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 count = 0
1127 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001128 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 count = count + 1
1130 dt = zinfo.date_time
1131 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001132 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001133 extra = []
1134 if zinfo.file_size > ZIP64_LIMIT \
1135 or zinfo.compress_size > ZIP64_LIMIT:
1136 extra.append(zinfo.file_size)
1137 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001138 file_size = 0xffffffff
1139 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001140 else:
1141 file_size = zinfo.file_size
1142 compress_size = zinfo.compress_size
1143
1144 if zinfo.header_offset > ZIP64_LIMIT:
1145 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001146 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001147 else:
1148 header_offset = zinfo.header_offset
1149
1150 extra_data = zinfo.extra
1151 if extra:
1152 # Append a ZIP64 field to the extra's
1153 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001154 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001155 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001156
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001157 extract_version = max(45, zinfo.extract_version)
1158 create_version = max(45, zinfo.create_version)
1159 else:
1160 extract_version = zinfo.extract_version
1161 create_version = zinfo.create_version
1162
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001163 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001164 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001165 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001166 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001167 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001168 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001169 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001170 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001171 0, zinfo.internal_attr, zinfo.external_attr,
1172 header_offset)
1173 except DeprecationWarning:
1174 print >>sys.stderr, (structCentralDir,
1175 stringCentralDir, create_version,
1176 zinfo.create_system, extract_version, zinfo.reserved,
1177 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1178 zinfo.CRC, compress_size, file_size,
1179 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1180 0, zinfo.internal_attr, zinfo.external_attr,
1181 header_offset)
1182 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001184 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001185 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001187
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 pos2 = self.fp.tell()
1189 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001190 centDirCount = count
1191 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001192 centDirOffset = pos1
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001193 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1194 centDirOffset > ZIP64_LIMIT or
1195 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001196 # Need to write the ZIP64 end-of-archive records
1197 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001198 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001199 44, 45, 45, 0, 0, centDirCount, centDirCount,
1200 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001201 self.fp.write(zip64endrec)
1202
1203 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001204 structEndArchive64Locator,
1205 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001206 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001207 centDirCount = min(centDirCount, 0xFFFF)
1208 centDirSize = min(centDirSize, 0xFFFFFFFF)
1209 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001210
Martin v. Löwis8c436412008-07-03 12:51:14 +00001211 # check for valid comment length
1212 if len(self.comment) >= ZIP_MAX_COMMENT:
1213 if self.debug > 0:
1214 msg = 'Archive comment is too long; truncating to %d bytes' \
1215 % ZIP_MAX_COMMENT
1216 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001217
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001218 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001219 0, 0, centDirCount, centDirCount,
1220 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001221 self.fp.write(endrec)
1222 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001223 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001224
Fred Drake3d9091e2001-03-26 15:49:24 +00001225 if not self._filePassed:
1226 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 self.fp = None
1228
1229
1230class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001231 """Class to create ZIP archives with Python library files and packages."""
1232
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 def writepy(self, pathname, basename = ""):
1234 """Add all files from "pathname" to the ZIP archive.
1235
Fred Drake484d7352000-10-02 21:14:52 +00001236 If pathname is a package directory, search the directory and
1237 all package subdirectories recursively for all *.py and enter
1238 the modules into the archive. If pathname is a plain
1239 directory, listdir *.py and enter all modules. Else, pathname
1240 must be a Python *.py file and the module will be put into the
1241 archive. Added modules are always module.pyo or module.pyc.
1242 This method will compile the module.py into module.pyc if
1243 necessary.
1244 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001245 dir, name = os.path.split(pathname)
1246 if os.path.isdir(pathname):
1247 initname = os.path.join(pathname, "__init__.py")
1248 if os.path.isfile(initname):
1249 # This is a package directory, add it
1250 if basename:
1251 basename = "%s/%s" % (basename, name)
1252 else:
1253 basename = name
1254 if self.debug:
1255 print "Adding package in", pathname, "as", basename
1256 fname, arcname = self._get_codename(initname[0:-3], basename)
1257 if self.debug:
1258 print "Adding", arcname
1259 self.write(fname, arcname)
1260 dirlist = os.listdir(pathname)
1261 dirlist.remove("__init__.py")
1262 # Add all *.py files and package subdirectories
1263 for filename in dirlist:
1264 path = os.path.join(pathname, filename)
1265 root, ext = os.path.splitext(filename)
1266 if os.path.isdir(path):
1267 if os.path.isfile(os.path.join(path, "__init__.py")):
1268 # This is a package directory, add it
1269 self.writepy(path, basename) # Recursive call
1270 elif ext == ".py":
1271 fname, arcname = self._get_codename(path[0:-3],
1272 basename)
1273 if self.debug:
1274 print "Adding", arcname
1275 self.write(fname, arcname)
1276 else:
1277 # This is NOT a package directory, add its files at top level
1278 if self.debug:
1279 print "Adding files from directory", pathname
1280 for filename in os.listdir(pathname):
1281 path = os.path.join(pathname, filename)
1282 root, ext = os.path.splitext(filename)
1283 if ext == ".py":
1284 fname, arcname = self._get_codename(path[0:-3],
1285 basename)
1286 if self.debug:
1287 print "Adding", arcname
1288 self.write(fname, arcname)
1289 else:
1290 if pathname[-3:] != ".py":
1291 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001292 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 fname, arcname = self._get_codename(pathname[0:-3], basename)
1294 if self.debug:
1295 print "Adding file", arcname
1296 self.write(fname, arcname)
1297
1298 def _get_codename(self, pathname, basename):
1299 """Return (filename, archivename) for the path.
1300
Fred Drake484d7352000-10-02 21:14:52 +00001301 Given a module name path, return the correct file path and
1302 archive name, compiling if necessary. For example, given
1303 /python/lib/string, return (/python/lib/string.pyc, string).
1304 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305 file_py = pathname + ".py"
1306 file_pyc = pathname + ".pyc"
1307 file_pyo = pathname + ".pyo"
1308 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001309 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001310 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001311 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001312 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001313 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314 if self.debug:
1315 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001316 try:
1317 py_compile.compile(file_py, file_pyc, None, True)
1318 except py_compile.PyCompileError,err:
1319 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 fname = file_pyc
1321 else:
1322 fname = file_pyc
1323 archivename = os.path.split(fname)[1]
1324 if basename:
1325 archivename = "%s/%s" % (basename, archivename)
1326 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001327
1328
1329def main(args = None):
1330 import textwrap
1331 USAGE=textwrap.dedent("""\
1332 Usage:
1333 zipfile.py -l zipfile.zip # Show listing of a zipfile
1334 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1335 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1336 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1337 """)
1338 if args is None:
1339 args = sys.argv[1:]
1340
1341 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1342 print USAGE
1343 sys.exit(1)
1344
1345 if args[0] == '-l':
1346 if len(args) != 2:
1347 print USAGE
1348 sys.exit(1)
1349 zf = ZipFile(args[1], 'r')
1350 zf.printdir()
1351 zf.close()
1352
1353 elif args[0] == '-t':
1354 if len(args) != 2:
1355 print USAGE
1356 sys.exit(1)
1357 zf = ZipFile(args[1], 'r')
1358 zf.testzip()
1359 print "Done testing"
1360
1361 elif args[0] == '-e':
1362 if len(args) != 3:
1363 print USAGE
1364 sys.exit(1)
1365
1366 zf = ZipFile(args[1], 'r')
1367 out = args[2]
1368 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001369 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001370 tgt = os.path.join(out, path[2:])
1371 else:
1372 tgt = os.path.join(out, path)
1373
1374 tgtdir = os.path.dirname(tgt)
1375 if not os.path.exists(tgtdir):
1376 os.makedirs(tgtdir)
1377 fp = open(tgt, 'wb')
1378 fp.write(zf.read(path))
1379 fp.close()
1380 zf.close()
1381
1382 elif args[0] == '-c':
1383 if len(args) < 3:
1384 print USAGE
1385 sys.exit(1)
1386
1387 def addToZip(zf, path, zippath):
1388 if os.path.isfile(path):
1389 zf.write(path, zippath, ZIP_DEFLATED)
1390 elif os.path.isdir(path):
1391 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001392 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001393 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001394 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001395
1396 zf = ZipFile(args[1], 'w', allowZip64=True)
1397 for src in args[2:]:
1398 addToZip(zf, src, os.path.basename(src))
1399
1400 zf.close()
1401
1402if __name__ == "__main__":
1403 main()