blob: bb8fc53de4dc4c0845d3498c527458da1dcbd795 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000131def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 try:
134 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000135 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000143def _EndRecData64(fpin, offset, endrec):
144 """
145 Read the ZIP64 end-of-archive records and use that to update endrec
146 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000147 fpin.seek(offset - sizeEndCentDir64Locator, 2)
148 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000149 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
150 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000151 return endrec
152
153 if diskno != 0 or disks != 1:
154 raise BadZipfile("zipfiles that span multiple disks are not supported")
155
Tim Petersa608bb22006-06-15 18:06:29 +0000156 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000157 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
158 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000159 sig, sz, create_version, read_version, disk_num, disk_dir, \
160 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 struct.unpack(structEndArchive64, data)
162 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000166 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000167 endrec[_ECD_DISK_NUMBER] = disk_num
168 endrec[_ECD_DISK_START] = disk_dir
169 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
170 endrec[_ECD_ENTRIES_TOTAL] = dircount2
171 endrec[_ECD_SIZE] = dirsize
172 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 return endrec
174
175
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000176def _EndRecData(fpin):
177 """Return data from the "End of Central Directory" record, or None.
178
179 The data is a list of the nine items in the ZIP "End of central dir"
180 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181
182 # Determine file size
183 fpin.seek(0, 2)
184 filesize = fpin.tell()
185
186 # Check to see if this is ZIP file with no archive comment (the
187 # "end of central directory" structure should be the last item in the
188 # file if this is the case).
189 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000191 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000192 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000193 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000194 endrec=list(endrec)
195
196 # Append a blank comment and record start offset
197 endrec.append("")
198 endrec.append(filesize - sizeEndCentDir)
199 if endrec[_ECD_OFFSET] == 0xffffffff:
200 # the value for the "offset of the start of the central directory"
201 # indicates that there is a "Zip64 end of central directory"
202 # structure present, so go look for it
203 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
204
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000205 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000206
207 # Either this is not a ZIP file, or it is a ZIP file with an archive
208 # comment. Search the end of the file for the "end of central directory"
209 # record signature. The comment is the last item in the ZIP file and may be
210 # up to 64K long. It is assumed that the "end of central directory" magic
211 # number does not appear in the comment.
212 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
213 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000214 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000215 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216 if start >= 0:
217 # found the magic number; attempt to unpack and interpret
218 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000219 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000220 comment = data[start+sizeEndCentDir:]
221 # check that comment length is correct
222 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 # Append the archive comment and start offset
224 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225 endrec.append(maxCommentStart + start)
226 if endrec[_ECD_OFFSET] == 0xffffffff:
227 # There is apparently a "Zip64 end of central directory"
228 # structure present, so go look for it
229 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000230 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000231
232 # Unable to find a valid end of central directory structure
233 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234
Fred Drake484d7352000-10-02 21:14:52 +0000235
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000236class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000237 """Class with attributes describing each file in the ZIP archive."""
238
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000239 __slots__ = (
240 'orig_filename',
241 'filename',
242 'date_time',
243 'compress_type',
244 'comment',
245 'extra',
246 'create_system',
247 'create_version',
248 'extract_version',
249 'reserved',
250 'flag_bits',
251 'volume',
252 'internal_attr',
253 'external_attr',
254 'header_offset',
255 'CRC',
256 'compress_size',
257 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000258 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000259 )
260
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000262 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000263
264 # Terminate the file name at the first null byte. Null bytes in file
265 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000266 null_byte = filename.find(chr(0))
267 if null_byte >= 0:
268 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000269 # This is used to ensure paths in generated ZIP files always use
270 # forward slashes as the directory separator, as required by the
271 # ZIP format specification.
272 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000273 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000274
Greg Ward8e36d282003-06-18 00:53:06 +0000275 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000276 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000278 self.compress_type = ZIP_STORED # Type of compression for the file
279 self.comment = "" # Comment for each file
280 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000281 if sys.platform == 'win32':
282 self.create_system = 0 # System which created ZIP archive
283 else:
284 # Assume everything else is unix-y
285 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000286 self.create_version = 20 # Version which created ZIP archive
287 self.extract_version = 20 # Version needed to extract archive
288 self.reserved = 0 # Must be zero
289 self.flag_bits = 0 # ZIP flag bits
290 self.volume = 0 # Volume number of file header
291 self.internal_attr = 0 # Internal attributes
292 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000293 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000294 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000295 # CRC CRC-32 of the uncompressed file
296 # compress_size Size of the compressed file
297 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298
299 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000300 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000301 dt = self.date_time
302 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000303 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000304 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000305 # Set these to zero because we write them after the file data
306 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 else:
Tim Peterse1190062001-01-15 03:34:38 +0000308 CRC = self.CRC
309 compress_size = self.compress_size
310 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000311
312 extra = self.extra
313
314 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
315 # File is larger than what fits into a 4 byte integer,
316 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000317 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000318 extra = extra + struct.pack(fmt,
319 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000320 file_size = 0xffffffff
321 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000322 self.extract_version = max(45, self.extract_version)
323 self.create_version = max(45, self.extract_version)
324
Martin v. Löwis471617d2008-05-05 17:16:58 +0000325 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000326 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000327 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 self.compress_type, dostime, dosdate, CRC,
329 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000330 len(filename), len(extra))
331 return header + filename + extra
332
333 def _encodeFilenameFlags(self):
334 if isinstance(self.filename, unicode):
335 try:
336 return self.filename.encode('ascii'), self.flag_bits
337 except UnicodeEncodeError:
338 return self.filename.encode('utf-8'), self.flag_bits | 0x800
339 else:
340 return self.filename, self.flag_bits
341
342 def _decodeFilename(self):
343 if self.flag_bits & 0x800:
344 return self.filename.decode('utf-8')
345 else:
346 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000347
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000348 def _decodeExtra(self):
349 # Try to decode the extra field.
350 extra = self.extra
351 unpack = struct.unpack
352 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000353 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000354 if tp == 1:
355 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000356 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000357 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000358 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000359 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000360 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000361 elif ln == 0:
362 counts = ()
363 else:
364 raise RuntimeError, "Corrupt extra field %s"%(ln,)
365
366 idx = 0
367
368 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000369 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000370 self.file_size = counts[idx]
371 idx += 1
372
Martin v. Löwis8c436412008-07-03 12:51:14 +0000373 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000374 self.compress_size = counts[idx]
375 idx += 1
376
Martin v. Löwis8c436412008-07-03 12:51:14 +0000377 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000378 old = self.header_offset
379 self.header_offset = counts[idx]
380 idx+=1
381
382 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000383
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000385class _ZipDecrypter:
386 """Class to handle decryption of files stored within a ZIP archive.
387
388 ZIP supports a password-based form of encryption. Even though known
389 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000390 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000391
392 Usage:
393 zd = _ZipDecrypter(mypwd)
394 plain_char = zd(cypher_char)
395 plain_text = map(zd, cypher_text)
396 """
397
398 def _GenerateCRCTable():
399 """Generate a CRC-32 table.
400
401 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
402 internal keys. We noticed that a direct implementation is faster than
403 relying on binascii.crc32().
404 """
405 poly = 0xedb88320
406 table = [0] * 256
407 for i in range(256):
408 crc = i
409 for j in range(8):
410 if crc & 1:
411 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
412 else:
413 crc = ((crc >> 1) & 0x7FFFFFFF)
414 table[i] = crc
415 return table
416 crctable = _GenerateCRCTable()
417
418 def _crc32(self, ch, crc):
419 """Compute the CRC32 primitive on one byte."""
420 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
421
422 def __init__(self, pwd):
423 self.key0 = 305419896
424 self.key1 = 591751049
425 self.key2 = 878082192
426 for p in pwd:
427 self._UpdateKeys(p)
428
429 def _UpdateKeys(self, c):
430 self.key0 = self._crc32(c, self.key0)
431 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
432 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
433 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
434
435 def __call__(self, c):
436 """Decrypt a single character."""
437 c = ord(c)
438 k = self.key2 | 2
439 c = c ^ (((k * (k^1)) >> 8) & 255)
440 c = chr(c)
441 self._UpdateKeys(c)
442 return c
443
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000444class ZipExtFile:
445 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000446 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000447 """
Tim Petersea5962f2007-03-12 18:07:52 +0000448
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000449 def __init__(self, fileobj, zipinfo, decrypt=None):
450 self.fileobj = fileobj
451 self.decrypter = decrypt
452 self.bytes_read = 0L
453 self.rawbuffer = ''
454 self.readbuffer = ''
455 self.linebuffer = ''
456 self.eof = False
457 self.univ_newlines = False
458 self.nlSeps = ("\n", )
459 self.lastdiscard = ''
460
461 self.compress_type = zipinfo.compress_type
462 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000463
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000464 self.closed = False
465 self.mode = "r"
466 self.name = zipinfo.filename
467
468 # read from compressed files in 64k blocks
469 self.compreadsize = 64*1024
470 if self.compress_type == ZIP_DEFLATED:
471 self.dc = zlib.decompressobj(-15)
472
473 def set_univ_newlines(self, univ_newlines):
474 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000475
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000476 # pick line separator char(s) based on universal newlines flag
477 self.nlSeps = ("\n", )
478 if self.univ_newlines:
479 self.nlSeps = ("\r\n", "\r", "\n")
480
481 def __iter__(self):
482 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000483
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000484 def next(self):
485 nextline = self.readline()
486 if not nextline:
487 raise StopIteration()
488
489 return nextline
490
491 def close(self):
492 self.closed = True
493
494 def _checkfornewline(self):
495 nl, nllen = -1, -1
496 if self.linebuffer:
497 # ugly check for cases where half of an \r\n pair was
498 # read on the last pass, and the \r was discarded. In this
499 # case we just throw away the \n at the start of the buffer.
500 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
501 self.linebuffer = self.linebuffer[1:]
502
Tim Petersea5962f2007-03-12 18:07:52 +0000503 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000504 nl = self.linebuffer.find(sep)
505 if nl >= 0:
506 nllen = len(sep)
507 return nl, nllen
508
509 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000510
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000511 def readline(self, size = -1):
512 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000513 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000514 """
515 if size < 0:
516 size = sys.maxint
517 elif size == 0:
518 return ''
519
520 # check for a newline already in buffer
521 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000522
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000523 if nl >= 0:
524 # the next line was already in the buffer
525 nl = min(nl, size)
526 else:
527 # no line break in buffer - try to read more
528 size -= len(self.linebuffer)
529 while nl < 0 and size > 0:
530 buf = self.read(min(size, 100))
531 if not buf:
532 break
533 self.linebuffer += buf
534 size -= len(buf)
535
536 # check for a newline in buffer
537 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000538
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000539 # we either ran out of bytes in the file, or
540 # met the specified size limit without finding a newline,
541 # so return current buffer
542 if nl < 0:
543 s = self.linebuffer
544 self.linebuffer = ''
545 return s
546
547 buf = self.linebuffer[:nl]
548 self.lastdiscard = self.linebuffer[nl:nl + nllen]
549 self.linebuffer = self.linebuffer[nl + nllen:]
550
551 # line is always returned with \n as newline char (except possibly
552 # for a final incomplete line in the file, which is handled above).
553 return buf + "\n"
554
555 def readlines(self, sizehint = -1):
556 """Return a list with all (following) lines. The sizehint parameter
557 is ignored in this implementation.
558 """
559 result = []
560 while True:
561 line = self.readline()
562 if not line: break
563 result.append(line)
564 return result
565
566 def read(self, size = None):
567 # act like file() obj and return empty string if size is 0
568 if size == 0:
569 return ''
570
571 # determine read size
572 bytesToRead = self.compress_size - self.bytes_read
573
574 # adjust read size for encrypted files since the first 12 bytes
575 # are for the encryption/password information
576 if self.decrypter is not None:
577 bytesToRead -= 12
578
579 if size is not None and size >= 0:
580 if self.compress_type == ZIP_STORED:
581 lr = len(self.readbuffer)
582 bytesToRead = min(bytesToRead, size - lr)
583 elif self.compress_type == ZIP_DEFLATED:
584 if len(self.readbuffer) > size:
585 # the user has requested fewer bytes than we've already
586 # pulled through the decompressor; don't read any more
587 bytesToRead = 0
588 else:
589 # user will use up the buffer, so read some more
590 lr = len(self.rawbuffer)
591 bytesToRead = min(bytesToRead, self.compreadsize - lr)
592
593 # avoid reading past end of file contents
594 if bytesToRead + self.bytes_read > self.compress_size:
595 bytesToRead = self.compress_size - self.bytes_read
596
597 # try to read from file (if necessary)
598 if bytesToRead > 0:
599 bytes = self.fileobj.read(bytesToRead)
600 self.bytes_read += len(bytes)
601 self.rawbuffer += bytes
602
603 # handle contents of raw buffer
604 if self.rawbuffer:
605 newdata = self.rawbuffer
606 self.rawbuffer = ''
607
608 # decrypt new data if we were given an object to handle that
609 if newdata and self.decrypter is not None:
610 newdata = ''.join(map(self.decrypter, newdata))
611
612 # decompress newly read data if necessary
613 if newdata and self.compress_type == ZIP_DEFLATED:
614 newdata = self.dc.decompress(newdata)
615 self.rawbuffer = self.dc.unconsumed_tail
616 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000617 # we're out of raw bytes (both from the file and
618 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000619 # decompressor is done
620 newdata += self.dc.flush()
621 # prevent decompressor from being used again
622 self.dc = None
623
624 self.readbuffer += newdata
625
626
627 # return what the user asked for
628 if size is None or len(self.readbuffer) <= size:
629 bytes = self.readbuffer
630 self.readbuffer = ''
631 else:
632 bytes = self.readbuffer[:size]
633 self.readbuffer = self.readbuffer[size:]
634
635 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000636
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000637
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000638class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000639 """ Class with methods to open, read, write, close, list zip files.
640
Martin v. Löwis8c436412008-07-03 12:51:14 +0000641 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000642
Fred Drake3d9091e2001-03-26 15:49:24 +0000643 file: Either the path to the file, or a file-like object.
644 If it is a path, the file will be opened and closed by ZipFile.
645 mode: The mode can be either read "r", write "w" or append "a".
646 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000647 allowZip64: if True ZipFile will create files with ZIP64 extensions when
648 needed, otherwise it will raise an exception when this would
649 be necessary.
650
Fred Drake3d9091e2001-03-26 15:49:24 +0000651 """
Fred Drake484d7352000-10-02 21:14:52 +0000652
Fred Drake90eac282001-02-28 05:29:34 +0000653 fp = None # Set here since __del__ checks it
654
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000655 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000656 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000657 if mode not in ("r", "w", "a"):
658 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
659
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000660 if compression == ZIP_STORED:
661 pass
662 elif compression == ZIP_DEFLATED:
663 if not zlib:
664 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000665 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000666 else:
667 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000668
669 self._allowZip64 = allowZip64
670 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000671 self.debug = 0 # Level of printing: 0 through 3
672 self.NameToInfo = {} # Find file info given name
673 self.filelist = [] # List of ZipInfo instances for archive
674 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000675 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000676 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000677 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000678
Fred Drake3d9091e2001-03-26 15:49:24 +0000679 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000680 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000681 self._filePassed = 0
682 self.filename = file
683 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000684 try:
685 self.fp = open(file, modeDict[mode])
686 except IOError:
687 if mode == 'a':
688 mode = key = 'w'
689 self.fp = open(file, modeDict[mode])
690 else:
691 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000692 else:
693 self._filePassed = 1
694 self.fp = file
695 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000696
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000697 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 self._GetContents()
699 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000702 try: # See if file is a zip file
703 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000704 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000705 self.fp.seek(self.start_dir, 0)
706 except BadZipfile: # file is not a zip file, just append
707 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000708 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000709 if not self._filePassed:
710 self.fp.close()
711 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 raise RuntimeError, 'Mode must be "r", "w" or "a"'
713
714 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000715 """Read the directory, making sure we close the file if the format
716 is bad."""
717 try:
718 self._RealGetContents()
719 except BadZipfile:
720 if not self._filePassed:
721 self.fp.close()
722 self.fp = None
723 raise
724
725 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000726 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000728 endrec = _EndRecData(fp)
729 if not endrec:
730 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 if self.debug > 1:
732 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000733 size_cd = endrec[_ECD_SIZE] # bytes in central directory
734 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
735 self.comment = endrec[_ECD_COMMENT] # archive comment
736
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000737 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000738 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000739 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
740 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000741 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
742
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000744 inferred = concat + offset_cd
745 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 # self.start_dir: Position of start of central directory
747 self.start_dir = offset_cd + concat
748 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000749 data = fp.read(size_cd)
750 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 total = 0
752 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000753 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000754 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 raise BadZipfile, "Bad magic number for central directory"
756 centdir = struct.unpack(structCentralDir, centdir)
757 if self.debug > 2:
758 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000759 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 # Create ZipInfo instance to store file information
761 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000762 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
763 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000764 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 (x.create_version, x.create_system, x.extract_version, x.reserved,
766 x.flag_bits, x.compress_type, t, d,
767 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
768 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
769 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000770 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000772 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000773
774 x._decodeExtra()
775 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000776 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 self.filelist.append(x)
778 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000779
780 # update total bytes read from central directory
781 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
782 + centdir[_CD_EXTRA_FIELD_LENGTH]
783 + centdir[_CD_COMMENT_LENGTH])
784
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000785 if self.debug > 2:
786 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000787
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788
789 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000790 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 l = []
792 for data in self.filelist:
793 l.append(data.filename)
794 return l
795
796 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000797 """Return a list of class ZipInfo instances for files in the
798 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 return self.filelist
800
801 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000802 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
804 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000805 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
807
808 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000809 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000810 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 for zinfo in self.filelist:
812 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000813 # Read by chunks, to avoid an OverflowError or a
814 # MemoryError with very large embedded files.
815 f = self.open(zinfo.filename, "r")
816 while f.read(chunk_size): # Check CRC-32
817 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000818 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 return zinfo.filename
820
821 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000823 info = self.NameToInfo.get(name)
824 if info is None:
825 raise KeyError(
826 'There is no item named %r in the archive' % name)
827
828 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000830 def setpassword(self, pwd):
831 """Set default password for encrypted files."""
832 self.pwd = pwd
833
834 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000835 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000836 return self.open(name, "r", pwd).read()
837
838 def open(self, name, mode="r", pwd=None):
839 """Return file-like object for 'name'."""
840 if mode not in ("r", "U", "rU"):
841 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842 if not self.fp:
843 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000844 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000845
Tim Petersea5962f2007-03-12 18:07:52 +0000846 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000847 # given a file object in the constructor
848 if self._filePassed:
849 zef_file = self.fp
850 else:
851 zef_file = open(self.filename, 'rb')
852
Georg Brandl112aa502008-05-20 08:25:48 +0000853 # Make sure we have an info object
854 if isinstance(name, ZipInfo):
855 # 'name' is already an info object
856 zinfo = name
857 else:
858 # Get info object for name
859 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000860
861 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000862
863 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000864 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000865 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000866 raise BadZipfile, "Bad magic number for file header"
867
868 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000869 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000870 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000871 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000872
873 if fname != zinfo.orig_filename:
874 raise BadZipfile, \
875 'File name in directory "%s" and header "%s" differ.' % (
876 zinfo.orig_filename, fname)
877
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000878 # check for encrypted flag & handle password
879 is_encrypted = zinfo.flag_bits & 0x1
880 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000881 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000882 if not pwd:
883 pwd = self.pwd
884 if not pwd:
885 raise RuntimeError, "File %s is encrypted, " \
886 "password required for extraction" % name
887
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000888 zd = _ZipDecrypter(pwd)
889 # The first 12 bytes in the cypher stream is an encryption header
890 # used to strengthen the algorithm. The first 11 bytes are
891 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000892 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000893 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000894 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000895 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000896 if zinfo.flag_bits & 0x8:
897 # compare against the file type from extended local headers
898 check_byte = (zinfo._raw_time >> 8) & 0xff
899 else:
900 # compare against the CRC otherwise
901 check_byte = (zinfo.CRC >> 24) & 0xff
902 if ord(h[11]) != check_byte:
903 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904
905 # build and return a ZipExtFile
906 if zd is None:
907 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000908 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000909 zef = ZipExtFile(zef_file, zinfo, zd)
910
911 # set universal newlines on ZipExtFile if necessary
912 if "U" in mode:
913 zef.set_univ_newlines(True)
914 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915
Georg Brandl62416bc2008-01-07 18:47:44 +0000916 def extract(self, member, path=None, pwd=None):
917 """Extract a member from the archive to the current working directory,
918 using its full name. Its file information is extracted as accurately
919 as possible. `member' may be a filename or a ZipInfo object. You can
920 specify a different directory using `path'.
921 """
922 if not isinstance(member, ZipInfo):
923 member = self.getinfo(member)
924
925 if path is None:
926 path = os.getcwd()
927
928 return self._extract_member(member, path, pwd)
929
930 def extractall(self, path=None, members=None, pwd=None):
931 """Extract all members from the archive to the current working
932 directory. `path' specifies a different directory to extract to.
933 `members' is optional and must be a subset of the list returned
934 by namelist().
935 """
936 if members is None:
937 members = self.namelist()
938
939 for zipinfo in members:
940 self.extract(zipinfo, path, pwd)
941
942 def _extract_member(self, member, targetpath, pwd):
943 """Extract the ZipInfo object 'member' to a physical
944 file on the path targetpath.
945 """
946 # build the destination pathname, replacing
947 # forward slashes to platform specific separators.
948 if targetpath[-1:] == "/":
949 targetpath = targetpath[:-1]
950
951 # don't include leading "/" from file name if present
952 if os.path.isabs(member.filename):
953 targetpath = os.path.join(targetpath, member.filename[1:])
954 else:
955 targetpath = os.path.join(targetpath, member.filename)
956
957 targetpath = os.path.normpath(targetpath)
958
959 # Create all upper directories if necessary.
960 upperdirs = os.path.dirname(targetpath)
961 if upperdirs and not os.path.exists(upperdirs):
962 os.makedirs(upperdirs)
963
Georg Brandl112aa502008-05-20 08:25:48 +0000964 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000965 target = file(targetpath, "wb")
966 shutil.copyfileobj(source, target)
967 source.close()
968 target.close()
969
970 return targetpath
971
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000973 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000974 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000975 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000976 print "Duplicate name:", zinfo.filename
977 if self.mode not in ("w", "a"):
978 raise RuntimeError, 'write() requires mode "w" or "a"'
979 if not self.fp:
980 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000981 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000982 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
983 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000984 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000985 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
986 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000987 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000988 if zinfo.file_size > ZIP64_LIMIT:
989 if not self._allowZip64:
990 raise LargeZipFile("Filesize would require ZIP64 extensions")
991 if zinfo.header_offset > ZIP64_LIMIT:
992 if not self._allowZip64:
993 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994
995 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000996 """Put the bytes from filename into the archive under the name
997 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000998 if not self.fp:
999 raise RuntimeError(
1000 "Attempt to write to ZIP archive that was already closed")
1001
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001002 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001003 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 date_time = mtime[0:6]
1005 # Create ZipInfo instance to store file information
1006 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001007 arcname = filename
1008 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1009 while arcname[0] in (os.sep, os.altsep):
1010 arcname = arcname[1:]
1011 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001012 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001014 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 else:
Tim Peterse1190062001-01-15 03:34:38 +00001016 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001017
1018 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001019 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001020 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001021
1022 self._writecheck(zinfo)
1023 self._didModify = True
1024 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001025 # Must overwrite CRC and sizes with correct data later
1026 zinfo.CRC = CRC = 0
1027 zinfo.compress_size = compress_size = 0
1028 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001030 if zinfo.compress_type == ZIP_DEFLATED:
1031 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1032 zlib.DEFLATED, -15)
1033 else:
1034 cmpr = None
1035 while 1:
1036 buf = fp.read(1024 * 8)
1037 if not buf:
1038 break
1039 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001040 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 if cmpr:
1042 buf = cmpr.compress(buf)
1043 compress_size = compress_size + len(buf)
1044 self.fp.write(buf)
1045 fp.close()
1046 if cmpr:
1047 buf = cmpr.flush()
1048 compress_size = compress_size + len(buf)
1049 self.fp.write(buf)
1050 zinfo.compress_size = compress_size
1051 else:
1052 zinfo.compress_size = file_size
1053 zinfo.CRC = CRC
1054 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001055 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001056 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001057 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001058 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001060 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 self.filelist.append(zinfo)
1062 self.NameToInfo[zinfo.filename] = zinfo
1063
Just van Rossumb083cb32002-12-12 12:23:32 +00001064 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001065 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001066 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1067 the name of the file in the archive."""
1068 if not isinstance(zinfo_or_arcname, ZipInfo):
1069 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001070 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001071 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001072 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001073 else:
1074 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001075
1076 if not self.fp:
1077 raise RuntimeError(
1078 "Attempt to write to ZIP archive that was already closed")
1079
Tim Peterse1190062001-01-15 03:34:38 +00001080 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001081 zinfo.header_offset = self.fp.tell() # Start of header bytes
1082 self._writecheck(zinfo)
1083 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001084 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if zinfo.compress_type == ZIP_DEFLATED:
1086 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1087 zlib.DEFLATED, -15)
1088 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001089 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 else:
1091 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001092 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001095 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001097 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001098 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001099 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 self.filelist.append(zinfo)
1101 self.NameToInfo[zinfo.filename] = zinfo
1102
1103 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001104 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001105 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106
1107 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001108 """Close the file, and for mode "w" and "a" write the ending
1109 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001110 if self.fp is None:
1111 return
Tim Petersa608bb22006-06-15 18:06:29 +00001112
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001113 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 count = 0
1115 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001116 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001117 count = count + 1
1118 dt = zinfo.date_time
1119 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001120 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001121 extra = []
1122 if zinfo.file_size > ZIP64_LIMIT \
1123 or zinfo.compress_size > ZIP64_LIMIT:
1124 extra.append(zinfo.file_size)
1125 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001126 file_size = 0xffffffff
1127 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001128 else:
1129 file_size = zinfo.file_size
1130 compress_size = zinfo.compress_size
1131
1132 if zinfo.header_offset > ZIP64_LIMIT:
1133 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001134 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001135 else:
1136 header_offset = zinfo.header_offset
1137
1138 extra_data = zinfo.extra
1139 if extra:
1140 # Append a ZIP64 field to the extra's
1141 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001142 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001143 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001144
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001145 extract_version = max(45, zinfo.extract_version)
1146 create_version = max(45, zinfo.create_version)
1147 else:
1148 extract_version = zinfo.extract_version
1149 create_version = zinfo.create_version
1150
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001151 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001152 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001153 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001154 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001155 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001156 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001157 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001158 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001159 0, zinfo.internal_attr, zinfo.external_attr,
1160 header_offset)
1161 except DeprecationWarning:
1162 print >>sys.stderr, (structCentralDir,
1163 stringCentralDir, create_version,
1164 zinfo.create_system, extract_version, zinfo.reserved,
1165 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1166 zinfo.CRC, compress_size, file_size,
1167 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1168 0, zinfo.internal_attr, zinfo.external_attr,
1169 header_offset)
1170 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001172 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001173 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001174 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001175
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 pos2 = self.fp.tell()
1177 # Write end-of-zip-archive record
Martin v. Löwis8c436412008-07-03 12:51:14 +00001178 centDirOffset = pos1
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001179 if pos1 > ZIP64_LIMIT:
1180 # Need to write the ZIP64 end-of-archive records
1181 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001182 structEndArchive64, stringEndArchive64,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001183 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1184 self.fp.write(zip64endrec)
1185
1186 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001187 structEndArchive64Locator,
1188 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001189 self.fp.write(zip64locrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001190 centDirOffset = 0xFFFFFFFF
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001191
Martin v. Löwis8c436412008-07-03 12:51:14 +00001192 # check for valid comment length
1193 if len(self.comment) >= ZIP_MAX_COMMENT:
1194 if self.debug > 0:
1195 msg = 'Archive comment is too long; truncating to %d bytes' \
1196 % ZIP_MAX_COMMENT
1197 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001198
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001199 endrec = struct.pack(structEndArchive, stringEndArchive,
Martin v. Löwis8c436412008-07-03 12:51:14 +00001200 0, 0, count % ZIP_FILECOUNT_LIMIT,
1201 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1202 centDirOffset, len(self.comment))
1203 self.fp.write(endrec)
1204 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001205 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001206
Fred Drake3d9091e2001-03-26 15:49:24 +00001207 if not self._filePassed:
1208 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001209 self.fp = None
1210
1211
1212class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001213 """Class to create ZIP archives with Python library files and packages."""
1214
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215 def writepy(self, pathname, basename = ""):
1216 """Add all files from "pathname" to the ZIP archive.
1217
Fred Drake484d7352000-10-02 21:14:52 +00001218 If pathname is a package directory, search the directory and
1219 all package subdirectories recursively for all *.py and enter
1220 the modules into the archive. If pathname is a plain
1221 directory, listdir *.py and enter all modules. Else, pathname
1222 must be a Python *.py file and the module will be put into the
1223 archive. Added modules are always module.pyo or module.pyc.
1224 This method will compile the module.py into module.pyc if
1225 necessary.
1226 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 dir, name = os.path.split(pathname)
1228 if os.path.isdir(pathname):
1229 initname = os.path.join(pathname, "__init__.py")
1230 if os.path.isfile(initname):
1231 # This is a package directory, add it
1232 if basename:
1233 basename = "%s/%s" % (basename, name)
1234 else:
1235 basename = name
1236 if self.debug:
1237 print "Adding package in", pathname, "as", basename
1238 fname, arcname = self._get_codename(initname[0:-3], basename)
1239 if self.debug:
1240 print "Adding", arcname
1241 self.write(fname, arcname)
1242 dirlist = os.listdir(pathname)
1243 dirlist.remove("__init__.py")
1244 # Add all *.py files and package subdirectories
1245 for filename in dirlist:
1246 path = os.path.join(pathname, filename)
1247 root, ext = os.path.splitext(filename)
1248 if os.path.isdir(path):
1249 if os.path.isfile(os.path.join(path, "__init__.py")):
1250 # This is a package directory, add it
1251 self.writepy(path, basename) # Recursive call
1252 elif ext == ".py":
1253 fname, arcname = self._get_codename(path[0:-3],
1254 basename)
1255 if self.debug:
1256 print "Adding", arcname
1257 self.write(fname, arcname)
1258 else:
1259 # This is NOT a package directory, add its files at top level
1260 if self.debug:
1261 print "Adding files from directory", pathname
1262 for filename in os.listdir(pathname):
1263 path = os.path.join(pathname, filename)
1264 root, ext = os.path.splitext(filename)
1265 if ext == ".py":
1266 fname, arcname = self._get_codename(path[0:-3],
1267 basename)
1268 if self.debug:
1269 print "Adding", arcname
1270 self.write(fname, arcname)
1271 else:
1272 if pathname[-3:] != ".py":
1273 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001274 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001275 fname, arcname = self._get_codename(pathname[0:-3], basename)
1276 if self.debug:
1277 print "Adding file", arcname
1278 self.write(fname, arcname)
1279
1280 def _get_codename(self, pathname, basename):
1281 """Return (filename, archivename) for the path.
1282
Fred Drake484d7352000-10-02 21:14:52 +00001283 Given a module name path, return the correct file path and
1284 archive name, compiling if necessary. For example, given
1285 /python/lib/string, return (/python/lib/string.pyc, string).
1286 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 file_py = pathname + ".py"
1288 file_pyc = pathname + ".pyc"
1289 file_pyo = pathname + ".pyo"
1290 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001291 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001292 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001294 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001295 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001296 if self.debug:
1297 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001298 try:
1299 py_compile.compile(file_py, file_pyc, None, True)
1300 except py_compile.PyCompileError,err:
1301 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001302 fname = file_pyc
1303 else:
1304 fname = file_pyc
1305 archivename = os.path.split(fname)[1]
1306 if basename:
1307 archivename = "%s/%s" % (basename, archivename)
1308 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001309
1310
1311def main(args = None):
1312 import textwrap
1313 USAGE=textwrap.dedent("""\
1314 Usage:
1315 zipfile.py -l zipfile.zip # Show listing of a zipfile
1316 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1317 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1318 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1319 """)
1320 if args is None:
1321 args = sys.argv[1:]
1322
1323 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1324 print USAGE
1325 sys.exit(1)
1326
1327 if args[0] == '-l':
1328 if len(args) != 2:
1329 print USAGE
1330 sys.exit(1)
1331 zf = ZipFile(args[1], 'r')
1332 zf.printdir()
1333 zf.close()
1334
1335 elif args[0] == '-t':
1336 if len(args) != 2:
1337 print USAGE
1338 sys.exit(1)
1339 zf = ZipFile(args[1], 'r')
1340 zf.testzip()
1341 print "Done testing"
1342
1343 elif args[0] == '-e':
1344 if len(args) != 3:
1345 print USAGE
1346 sys.exit(1)
1347
1348 zf = ZipFile(args[1], 'r')
1349 out = args[2]
1350 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001351 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001352 tgt = os.path.join(out, path[2:])
1353 else:
1354 tgt = os.path.join(out, path)
1355
1356 tgtdir = os.path.dirname(tgt)
1357 if not os.path.exists(tgtdir):
1358 os.makedirs(tgtdir)
1359 fp = open(tgt, 'wb')
1360 fp.write(zf.read(path))
1361 fp.close()
1362 zf.close()
1363
1364 elif args[0] == '-c':
1365 if len(args) < 3:
1366 print USAGE
1367 sys.exit(1)
1368
1369 def addToZip(zf, path, zippath):
1370 if os.path.isfile(path):
1371 zf.write(path, zippath, ZIP_DEFLATED)
1372 elif os.path.isdir(path):
1373 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001374 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001375 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001376 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001377
1378 zf = ZipFile(args[1], 'w', allowZip64=True)
1379 for src in args[2:]:
1380 addToZip(zf, src, os.path.basename(src))
1381
1382 zf.close()
1383
1384if __name__ == "__main__":
1385 main()