blob: c7a5cb15bd42db15a3ab4b3c2053538c77f86de6 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
46structEndCentDir = "<4s4H2LH"
47magicEndCentDir = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndCentDir)
49
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
66magicCentralDir = "PK\001\002"
67sizeCentralDir = struct.calcsize(structCentralDir)
68
69# The "local file header" structure, magic number, size, and indices
70# (section V.A in the format document)
71structFileHeader = "<4s2B4HL2L2H"
72magicFileHeader = "PK\003\004"
73sizeFileHeader = struct.calcsize(structFileHeader)
74
75# The "Zip64 end of central directory locator" structure, magic number, and size
76structEndCentDir64Locator = "<4sLQL"
77magicEndCentDir64Locator = "PK\x06\x07"
78sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
79
80# The "Zip64 end of central directory" record, magic number, size, and indices
81# (section V.G in the format document)
82structEndCentDir64 = "<4sQ2H2L4Q"
83magicEndCentDir64 = "PK\x06\x06"
84sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
85
86_CD64_SIGNATURE = 0
87_CD64_DIRECTORY_RECSIZE = 1
88_CD64_CREATE_VERSION = 2
89_CD64_EXTRACT_VERSION = 3
90_CD64_DISK_NUMBER = 4
91_CD64_DISK_NUMBER_START = 5
92_CD64_NUMBER_ENTRIES_THIS_DISK = 6
93_CD64_NUMBER_ENTRIES_TOTAL = 7
94_CD64_DIRECTORY_SIZE = 8
95_CD64_OFFSET_START_CENTDIR = 9
Guido van Rossum32abe6f2000-03-31 17:30:02 +000096
Fred Drake3e038e52001-02-28 17:56:26 +000097# indexes of entries in the central directory structure
98_CD_SIGNATURE = 0
99_CD_CREATE_VERSION = 1
100_CD_CREATE_SYSTEM = 2
101_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +0000102_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000103_CD_FLAG_BITS = 5
104_CD_COMPRESS_TYPE = 6
105_CD_TIME = 7
106_CD_DATE = 8
107_CD_CRC = 9
108_CD_COMPRESSED_SIZE = 10
109_CD_UNCOMPRESSED_SIZE = 11
110_CD_FILENAME_LENGTH = 12
111_CD_EXTRA_FIELD_LENGTH = 13
112_CD_COMMENT_LENGTH = 14
113_CD_DISK_NUMBER_START = 15
114_CD_INTERNAL_FILE_ATTRIBUTES = 16
115_CD_EXTERNAL_FILE_ATTRIBUTES = 17
116_CD_LOCAL_HEADER_OFFSET = 18
117
Martin v. Löwis8c436412008-07-03 12:51:14 +0000118# The "local file header" structure, magic number, size, and indices
119# (section V.A in the format document)
120structFileHeader = "<4s2B4HL2L2H"
121magicFileHeader = "PK\003\004"
122sizeFileHeader = struct.calcsize(structFileHeader)
123
Fred Drake3e038e52001-02-28 17:56:26 +0000124_FH_SIGNATURE = 0
125_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000126_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_GENERAL_PURPOSE_FLAG_BITS = 3
128_FH_COMPRESSION_METHOD = 4
129_FH_LAST_MOD_TIME = 5
130_FH_LAST_MOD_DATE = 6
131_FH_CRC = 7
132_FH_COMPRESSED_SIZE = 8
133_FH_UNCOMPRESSED_SIZE = 9
134_FH_FILENAME_LENGTH = 10
135_FH_EXTRA_FIELD_LENGTH = 11
136
Martin v. Löwis8c436412008-07-03 12:51:14 +0000137# The "Zip64 end of central directory locator" structure, magic number, and size
138structEndCentDir64Locator = "<4sLQL"
139magicEndCentDir64Locator = "PK\x06\x07"
140sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
141
142# The "Zip64 end of central directory" record, magic number, size, and indices
143# (section V.G in the format document)
144structEndCentDir64 = "<4sQ2H2L4Q"
145magicEndCentDir64 = "PK\x06\x06"
146sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
147
148_CD64_SIGNATURE = 0
149_CD64_DIRECTORY_RECSIZE = 1
150_CD64_CREATE_VERSION = 2
151_CD64_EXTRACT_VERSION = 3
152_CD64_DISK_NUMBER = 4
153_CD64_DISK_NUMBER_START = 5
154_CD64_NUMBER_ENTRIES_THIS_DISK = 6
155_CD64_NUMBER_ENTRIES_TOTAL = 7
156_CD64_DIRECTORY_SIZE = 8
157_CD64_OFFSET_START_CENTDIR = 9
158
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000159def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000160 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161 try:
162 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000166 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000167 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171def _EndRecData64(fpin, offset, endrec):
172 """
173 Read the ZIP64 end-of-archive records and use that to update endrec
174 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000175 fpin.seek(offset - sizeEndCentDir64Locator, 2)
176 data = fpin.read(sizeEndCentDir64Locator)
177 sig, diskno, reloff, disks = struct.unpack(structEndCentDir64Locator, data)
178 if sig != magicEndCentDir64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 return endrec
180
181 if diskno != 0 or disks != 1:
182 raise BadZipfile("zipfiles that span multiple disks are not supported")
183
Tim Petersa608bb22006-06-15 18:06:29 +0000184 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000185 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
186 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 sig, sz, create_version, read_version, disk_num, disk_dir, \
188 dircount, dircount2, dirsize, diroffset = \
Martin v. Löwis8c436412008-07-03 12:51:14 +0000189 struct.unpack(structEndCentDir64, data)
190 if sig != magicEndCentDir64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000191 return endrec
192
193 # Update the original endrec using data from the ZIP64 record
Martin v. Löwis8c436412008-07-03 12:51:14 +0000194 endrec[_ECD_DISK_NUMBER] = disk_num
195 endrec[_ECD_DISK_START] = disk_dir
196 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
197 endrec[_ECD_ENTRIES_TOTAL] = dircount2
198 endrec[_ECD_SIZE] = dirsize
199 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000200 return endrec
201
202
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000203def _EndRecData(fpin):
204 """Return data from the "End of Central Directory" record, or None.
205
206 The data is a list of the nine items in the ZIP "End of central dir"
207 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000208
209 # Determine file size
210 fpin.seek(0, 2)
211 filesize = fpin.tell()
212
213 # Check to see if this is ZIP file with no archive comment (the
214 # "end of central directory" structure should be the last item in the
215 # file if this is the case).
216 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217 data = fpin.read()
Martin v. Löwis8c436412008-07-03 12:51:14 +0000218 if data[0:4] == magicEndCentDir and data[-2:] == "\000\000":
219 # the signature is correct and there's no comment, unpack structure
220 endrec = struct.unpack(structEndCentDir, data)
221 endrec=list(endrec)
222
223 # Append a blank comment and record start offset
224 endrec.append("")
225 endrec.append(filesize - sizeEndCentDir)
226 if endrec[_ECD_OFFSET] == 0xffffffff:
227 # the value for the "offset of the start of the central directory"
228 # indicates that there is a "Zip64 end of central directory"
229 # structure present, so go look for it
230 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
231
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000232 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000233
234 # Either this is not a ZIP file, or it is a ZIP file with an archive
235 # comment. Search the end of the file for the "end of central directory"
236 # record signature. The comment is the last item in the ZIP file and may be
237 # up to 64K long. It is assumed that the "end of central directory" magic
238 # number does not appear in the comment.
239 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
240 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000241 data = fpin.read()
Martin v. Löwis8c436412008-07-03 12:51:14 +0000242 start = data.rfind(magicEndCentDir)
243 if start >= 0:
244 # found the magic number; attempt to unpack and interpret
245 recData = data[start:start+sizeEndCentDir]
246 endrec = list(struct.unpack(structEndCentDir, recData))
247 comment = data[start+sizeEndCentDir:]
248 # check that comment length is correct
249 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250 # Append the archive comment and start offset
251 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000252 endrec.append(maxCommentStart + start)
253 if endrec[_ECD_OFFSET] == 0xffffffff:
254 # There is apparently a "Zip64 end of central directory"
255 # structure present, so go look for it
256 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000257 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000258
259 # Unable to find a valid end of central directory structure
260 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000261
Fred Drake484d7352000-10-02 21:14:52 +0000262
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000263class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000264 """Class with attributes describing each file in the ZIP archive."""
265
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000266 __slots__ = (
267 'orig_filename',
268 'filename',
269 'date_time',
270 'compress_type',
271 'comment',
272 'extra',
273 'create_system',
274 'create_version',
275 'extract_version',
276 'reserved',
277 'flag_bits',
278 'volume',
279 'internal_attr',
280 'external_attr',
281 'header_offset',
282 'CRC',
283 'compress_size',
284 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000285 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000286 )
287
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000289 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
291 # Terminate the file name at the first null byte. Null bytes in file
292 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000293 null_byte = filename.find(chr(0))
294 if null_byte >= 0:
295 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000296 # This is used to ensure paths in generated ZIP files always use
297 # forward slashes as the directory separator, as required by the
298 # ZIP format specification.
299 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000300 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000301
Greg Ward8e36d282003-06-18 00:53:06 +0000302 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000303 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000304 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000305 self.compress_type = ZIP_STORED # Type of compression for the file
306 self.comment = "" # Comment for each file
307 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000308 if sys.platform == 'win32':
309 self.create_system = 0 # System which created ZIP archive
310 else:
311 # Assume everything else is unix-y
312 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000313 self.create_version = 20 # Version which created ZIP archive
314 self.extract_version = 20 # Version needed to extract archive
315 self.reserved = 0 # Must be zero
316 self.flag_bits = 0 # ZIP flag bits
317 self.volume = 0 # Volume number of file header
318 self.internal_attr = 0 # Internal attributes
319 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000321 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000322 # CRC CRC-32 of the uncompressed file
323 # compress_size Size of the compressed file
324 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325
326 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000327 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 dt = self.date_time
329 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000330 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000332 # Set these to zero because we write them after the file data
333 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 else:
Tim Peterse1190062001-01-15 03:34:38 +0000335 CRC = self.CRC
336 compress_size = self.compress_size
337 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338
339 extra = self.extra
340
341 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
342 # File is larger than what fits into a 4 byte integer,
343 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000344 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000345 extra = extra + struct.pack(fmt,
346 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000347 file_size = 0xffffffff
348 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000349 self.extract_version = max(45, self.extract_version)
350 self.create_version = max(45, self.extract_version)
351
Martin v. Löwis471617d2008-05-05 17:16:58 +0000352 filename, flag_bits = self._encodeFilenameFlags()
Martin v. Löwis8c436412008-07-03 12:51:14 +0000353 header = struct.pack(structFileHeader, magicFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000354 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 self.compress_type, dostime, dosdate, CRC,
356 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000357 len(filename), len(extra))
358 return header + filename + extra
359
360 def _encodeFilenameFlags(self):
361 if isinstance(self.filename, unicode):
362 try:
363 return self.filename.encode('ascii'), self.flag_bits
364 except UnicodeEncodeError:
365 return self.filename.encode('utf-8'), self.flag_bits | 0x800
366 else:
367 return self.filename, self.flag_bits
368
369 def _decodeFilename(self):
370 if self.flag_bits & 0x800:
371 return self.filename.decode('utf-8')
372 else:
373 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000375 def _decodeExtra(self):
376 # Try to decode the extra field.
377 extra = self.extra
378 unpack = struct.unpack
379 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000380 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 if tp == 1:
382 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000383 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000384 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000385 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000387 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000388 elif ln == 0:
389 counts = ()
390 else:
391 raise RuntimeError, "Corrupt extra field %s"%(ln,)
392
393 idx = 0
394
395 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000396 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000397 self.file_size = counts[idx]
398 idx += 1
399
Martin v. Löwis8c436412008-07-03 12:51:14 +0000400 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000401 self.compress_size = counts[idx]
402 idx += 1
403
Martin v. Löwis8c436412008-07-03 12:51:14 +0000404 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000405 old = self.header_offset
406 self.header_offset = counts[idx]
407 idx+=1
408
409 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000410
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000411
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000412class _ZipDecrypter:
413 """Class to handle decryption of files stored within a ZIP archive.
414
415 ZIP supports a password-based form of encryption. Even though known
416 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000417 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000418
419 Usage:
420 zd = _ZipDecrypter(mypwd)
421 plain_char = zd(cypher_char)
422 plain_text = map(zd, cypher_text)
423 """
424
425 def _GenerateCRCTable():
426 """Generate a CRC-32 table.
427
428 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
429 internal keys. We noticed that a direct implementation is faster than
430 relying on binascii.crc32().
431 """
432 poly = 0xedb88320
433 table = [0] * 256
434 for i in range(256):
435 crc = i
436 for j in range(8):
437 if crc & 1:
438 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
439 else:
440 crc = ((crc >> 1) & 0x7FFFFFFF)
441 table[i] = crc
442 return table
443 crctable = _GenerateCRCTable()
444
445 def _crc32(self, ch, crc):
446 """Compute the CRC32 primitive on one byte."""
447 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
448
449 def __init__(self, pwd):
450 self.key0 = 305419896
451 self.key1 = 591751049
452 self.key2 = 878082192
453 for p in pwd:
454 self._UpdateKeys(p)
455
456 def _UpdateKeys(self, c):
457 self.key0 = self._crc32(c, self.key0)
458 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
459 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
460 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
461
462 def __call__(self, c):
463 """Decrypt a single character."""
464 c = ord(c)
465 k = self.key2 | 2
466 c = c ^ (((k * (k^1)) >> 8) & 255)
467 c = chr(c)
468 self._UpdateKeys(c)
469 return c
470
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471class ZipExtFile:
472 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000473 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000474 """
Tim Petersea5962f2007-03-12 18:07:52 +0000475
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000476 def __init__(self, fileobj, zipinfo, decrypt=None):
477 self.fileobj = fileobj
478 self.decrypter = decrypt
479 self.bytes_read = 0L
480 self.rawbuffer = ''
481 self.readbuffer = ''
482 self.linebuffer = ''
483 self.eof = False
484 self.univ_newlines = False
485 self.nlSeps = ("\n", )
486 self.lastdiscard = ''
487
488 self.compress_type = zipinfo.compress_type
489 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000490
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000491 self.closed = False
492 self.mode = "r"
493 self.name = zipinfo.filename
494
495 # read from compressed files in 64k blocks
496 self.compreadsize = 64*1024
497 if self.compress_type == ZIP_DEFLATED:
498 self.dc = zlib.decompressobj(-15)
499
500 def set_univ_newlines(self, univ_newlines):
501 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000502
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000503 # pick line separator char(s) based on universal newlines flag
504 self.nlSeps = ("\n", )
505 if self.univ_newlines:
506 self.nlSeps = ("\r\n", "\r", "\n")
507
508 def __iter__(self):
509 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000510
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000511 def next(self):
512 nextline = self.readline()
513 if not nextline:
514 raise StopIteration()
515
516 return nextline
517
518 def close(self):
519 self.closed = True
520
521 def _checkfornewline(self):
522 nl, nllen = -1, -1
523 if self.linebuffer:
524 # ugly check for cases where half of an \r\n pair was
525 # read on the last pass, and the \r was discarded. In this
526 # case we just throw away the \n at the start of the buffer.
527 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
528 self.linebuffer = self.linebuffer[1:]
529
Tim Petersea5962f2007-03-12 18:07:52 +0000530 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000531 nl = self.linebuffer.find(sep)
532 if nl >= 0:
533 nllen = len(sep)
534 return nl, nllen
535
536 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000537
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000538 def readline(self, size = -1):
539 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000540 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000541 """
542 if size < 0:
543 size = sys.maxint
544 elif size == 0:
545 return ''
546
547 # check for a newline already in buffer
548 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000549
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000550 if nl >= 0:
551 # the next line was already in the buffer
552 nl = min(nl, size)
553 else:
554 # no line break in buffer - try to read more
555 size -= len(self.linebuffer)
556 while nl < 0 and size > 0:
557 buf = self.read(min(size, 100))
558 if not buf:
559 break
560 self.linebuffer += buf
561 size -= len(buf)
562
563 # check for a newline in buffer
564 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000565
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000566 # we either ran out of bytes in the file, or
567 # met the specified size limit without finding a newline,
568 # so return current buffer
569 if nl < 0:
570 s = self.linebuffer
571 self.linebuffer = ''
572 return s
573
574 buf = self.linebuffer[:nl]
575 self.lastdiscard = self.linebuffer[nl:nl + nllen]
576 self.linebuffer = self.linebuffer[nl + nllen:]
577
578 # line is always returned with \n as newline char (except possibly
579 # for a final incomplete line in the file, which is handled above).
580 return buf + "\n"
581
582 def readlines(self, sizehint = -1):
583 """Return a list with all (following) lines. The sizehint parameter
584 is ignored in this implementation.
585 """
586 result = []
587 while True:
588 line = self.readline()
589 if not line: break
590 result.append(line)
591 return result
592
593 def read(self, size = None):
594 # act like file() obj and return empty string if size is 0
595 if size == 0:
596 return ''
597
598 # determine read size
599 bytesToRead = self.compress_size - self.bytes_read
600
601 # adjust read size for encrypted files since the first 12 bytes
602 # are for the encryption/password information
603 if self.decrypter is not None:
604 bytesToRead -= 12
605
606 if size is not None and size >= 0:
607 if self.compress_type == ZIP_STORED:
608 lr = len(self.readbuffer)
609 bytesToRead = min(bytesToRead, size - lr)
610 elif self.compress_type == ZIP_DEFLATED:
611 if len(self.readbuffer) > size:
612 # the user has requested fewer bytes than we've already
613 # pulled through the decompressor; don't read any more
614 bytesToRead = 0
615 else:
616 # user will use up the buffer, so read some more
617 lr = len(self.rawbuffer)
618 bytesToRead = min(bytesToRead, self.compreadsize - lr)
619
620 # avoid reading past end of file contents
621 if bytesToRead + self.bytes_read > self.compress_size:
622 bytesToRead = self.compress_size - self.bytes_read
623
624 # try to read from file (if necessary)
625 if bytesToRead > 0:
626 bytes = self.fileobj.read(bytesToRead)
627 self.bytes_read += len(bytes)
628 self.rawbuffer += bytes
629
630 # handle contents of raw buffer
631 if self.rawbuffer:
632 newdata = self.rawbuffer
633 self.rawbuffer = ''
634
635 # decrypt new data if we were given an object to handle that
636 if newdata and self.decrypter is not None:
637 newdata = ''.join(map(self.decrypter, newdata))
638
639 # decompress newly read data if necessary
640 if newdata and self.compress_type == ZIP_DEFLATED:
641 newdata = self.dc.decompress(newdata)
642 self.rawbuffer = self.dc.unconsumed_tail
643 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000644 # we're out of raw bytes (both from the file and
645 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000646 # decompressor is done
647 newdata += self.dc.flush()
648 # prevent decompressor from being used again
649 self.dc = None
650
651 self.readbuffer += newdata
652
653
654 # return what the user asked for
655 if size is None or len(self.readbuffer) <= size:
656 bytes = self.readbuffer
657 self.readbuffer = ''
658 else:
659 bytes = self.readbuffer[:size]
660 self.readbuffer = self.readbuffer[size:]
661
662 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000663
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000664
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000666 """ Class with methods to open, read, write, close, list zip files.
667
Martin v. Löwis8c436412008-07-03 12:51:14 +0000668 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000669
Fred Drake3d9091e2001-03-26 15:49:24 +0000670 file: Either the path to the file, or a file-like object.
671 If it is a path, the file will be opened and closed by ZipFile.
672 mode: The mode can be either read "r", write "w" or append "a".
673 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000674 allowZip64: if True ZipFile will create files with ZIP64 extensions when
675 needed, otherwise it will raise an exception when this would
676 be necessary.
677
Fred Drake3d9091e2001-03-26 15:49:24 +0000678 """
Fred Drake484d7352000-10-02 21:14:52 +0000679
Fred Drake90eac282001-02-28 05:29:34 +0000680 fp = None # Set here since __del__ checks it
681
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000682 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000683 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000684 if mode not in ("r", "w", "a"):
685 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
686
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 if compression == ZIP_STORED:
688 pass
689 elif compression == ZIP_DEFLATED:
690 if not zlib:
691 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000692 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000693 else:
694 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000695
696 self._allowZip64 = allowZip64
697 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000698 self.debug = 0 # Level of printing: 0 through 3
699 self.NameToInfo = {} # Find file info given name
700 self.filelist = [] # List of ZipInfo instances for archive
701 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000702 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000703 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000704 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000705
Fred Drake3d9091e2001-03-26 15:49:24 +0000706 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000707 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 self._filePassed = 0
709 self.filename = file
710 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000711 try:
712 self.fp = open(file, modeDict[mode])
713 except IOError:
714 if mode == 'a':
715 mode = key = 'w'
716 self.fp = open(file, modeDict[mode])
717 else:
718 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000719 else:
720 self._filePassed = 1
721 self.fp = file
722 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000723
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 self._GetContents()
726 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000727 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000729 try: # See if file is a zip file
730 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000732 self.fp.seek(self.start_dir, 0)
733 except BadZipfile: # file is not a zip file, just append
734 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000736 if not self._filePassed:
737 self.fp.close()
738 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 raise RuntimeError, 'Mode must be "r", "w" or "a"'
740
741 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000742 """Read the directory, making sure we close the file if the format
743 is bad."""
744 try:
745 self._RealGetContents()
746 except BadZipfile:
747 if not self._filePassed:
748 self.fp.close()
749 self.fp = None
750 raise
751
752 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000753 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000754 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000755 endrec = _EndRecData(fp)
756 if not endrec:
757 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000758 if self.debug > 1:
759 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000760 size_cd = endrec[_ECD_SIZE] # bytes in central directory
761 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
762 self.comment = endrec[_ECD_COMMENT] # archive comment
763
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000764 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000765 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
766 if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
767 # If the offset of the "End of Central Dir" record requires Zip64
768 # extension structures, account for them
769 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
770
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000772 inferred = concat + offset_cd
773 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774 # self.start_dir: Position of start of central directory
775 self.start_dir = offset_cd + concat
776 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000777 data = fp.read(size_cd)
778 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000779 total = 0
780 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000781 centdir = fp.read(sizeCentralDir)
782 if centdir[0:4] != magicCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 raise BadZipfile, "Bad magic number for central directory"
784 centdir = struct.unpack(structCentralDir, centdir)
785 if self.debug > 2:
786 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000787 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 # Create ZipInfo instance to store file information
789 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000790 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
791 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000792 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 (x.create_version, x.create_system, x.extract_version, x.reserved,
794 x.flag_bits, x.compress_type, t, d,
795 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
796 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
797 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000798 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000800 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000801
802 x._decodeExtra()
803 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000804 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 self.filelist.append(x)
806 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000807
808 # update total bytes read from central directory
809 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
810 + centdir[_CD_EXTRA_FIELD_LENGTH]
811 + centdir[_CD_COMMENT_LENGTH])
812
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 if self.debug > 2:
814 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000815
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816
817 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000818 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 l = []
820 for data in self.filelist:
821 l.append(data.filename)
822 return l
823
824 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000825 """Return a list of class ZipInfo instances for files in the
826 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 return self.filelist
828
829 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000830 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000831 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
832 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000833 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
835
836 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000837 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 for zinfo in self.filelist:
839 try:
Tim Peterse1190062001-01-15 03:34:38 +0000840 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000841 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842 return zinfo.filename
843
844 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000845 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000846 info = self.NameToInfo.get(name)
847 if info is None:
848 raise KeyError(
849 'There is no item named %r in the archive' % name)
850
851 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000853 def setpassword(self, pwd):
854 """Set default password for encrypted files."""
855 self.pwd = pwd
856
857 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000858 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000859 return self.open(name, "r", pwd).read()
860
861 def open(self, name, mode="r", pwd=None):
862 """Return file-like object for 'name'."""
863 if mode not in ("r", "U", "rU"):
864 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 if not self.fp:
866 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000867 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000868
Tim Petersea5962f2007-03-12 18:07:52 +0000869 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000870 # given a file object in the constructor
871 if self._filePassed:
872 zef_file = self.fp
873 else:
874 zef_file = open(self.filename, 'rb')
875
Georg Brandl112aa502008-05-20 08:25:48 +0000876 # Make sure we have an info object
877 if isinstance(name, ZipInfo):
878 # 'name' is already an info object
879 zinfo = name
880 else:
881 # Get info object for name
882 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000883
884 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000885
886 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000887 fheader = zef_file.read(sizeFileHeader)
888 if fheader[0:4] != magicFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000889 raise BadZipfile, "Bad magic number for file header"
890
891 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000892 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000893 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000894 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000895
896 if fname != zinfo.orig_filename:
897 raise BadZipfile, \
898 'File name in directory "%s" and header "%s" differ.' % (
899 zinfo.orig_filename, fname)
900
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000901 # check for encrypted flag & handle password
902 is_encrypted = zinfo.flag_bits & 0x1
903 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000904 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000905 if not pwd:
906 pwd = self.pwd
907 if not pwd:
908 raise RuntimeError, "File %s is encrypted, " \
909 "password required for extraction" % name
910
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000911 zd = _ZipDecrypter(pwd)
912 # The first 12 bytes in the cypher stream is an encryption header
913 # used to strengthen the algorithm. The first 11 bytes are
914 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000915 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000916 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000917 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000918 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000919 if zinfo.flag_bits & 0x8:
920 # compare against the file type from extended local headers
921 check_byte = (zinfo._raw_time >> 8) & 0xff
922 else:
923 # compare against the CRC otherwise
924 check_byte = (zinfo.CRC >> 24) & 0xff
925 if ord(h[11]) != check_byte:
926 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000927
928 # build and return a ZipExtFile
929 if zd is None:
930 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000932 zef = ZipExtFile(zef_file, zinfo, zd)
933
934 # set universal newlines on ZipExtFile if necessary
935 if "U" in mode:
936 zef.set_univ_newlines(True)
937 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000938
Georg Brandl62416bc2008-01-07 18:47:44 +0000939 def extract(self, member, path=None, pwd=None):
940 """Extract a member from the archive to the current working directory,
941 using its full name. Its file information is extracted as accurately
942 as possible. `member' may be a filename or a ZipInfo object. You can
943 specify a different directory using `path'.
944 """
945 if not isinstance(member, ZipInfo):
946 member = self.getinfo(member)
947
948 if path is None:
949 path = os.getcwd()
950
951 return self._extract_member(member, path, pwd)
952
953 def extractall(self, path=None, members=None, pwd=None):
954 """Extract all members from the archive to the current working
955 directory. `path' specifies a different directory to extract to.
956 `members' is optional and must be a subset of the list returned
957 by namelist().
958 """
959 if members is None:
960 members = self.namelist()
961
962 for zipinfo in members:
963 self.extract(zipinfo, path, pwd)
964
965 def _extract_member(self, member, targetpath, pwd):
966 """Extract the ZipInfo object 'member' to a physical
967 file on the path targetpath.
968 """
969 # build the destination pathname, replacing
970 # forward slashes to platform specific separators.
971 if targetpath[-1:] == "/":
972 targetpath = targetpath[:-1]
973
974 # don't include leading "/" from file name if present
975 if os.path.isabs(member.filename):
976 targetpath = os.path.join(targetpath, member.filename[1:])
977 else:
978 targetpath = os.path.join(targetpath, member.filename)
979
980 targetpath = os.path.normpath(targetpath)
981
982 # Create all upper directories if necessary.
983 upperdirs = os.path.dirname(targetpath)
984 if upperdirs and not os.path.exists(upperdirs):
985 os.makedirs(upperdirs)
986
Georg Brandl112aa502008-05-20 08:25:48 +0000987 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000988 target = file(targetpath, "wb")
989 shutil.copyfileobj(source, target)
990 source.close()
991 target.close()
992
993 return targetpath
994
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000996 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000997 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000998 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 print "Duplicate name:", zinfo.filename
1000 if self.mode not in ("w", "a"):
1001 raise RuntimeError, 'write() requires mode "w" or "a"'
1002 if not self.fp:
1003 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001004 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1006 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001007 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1009 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001010 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001011 if zinfo.file_size > ZIP64_LIMIT:
1012 if not self._allowZip64:
1013 raise LargeZipFile("Filesize would require ZIP64 extensions")
1014 if zinfo.header_offset > ZIP64_LIMIT:
1015 if not self._allowZip64:
1016 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017
1018 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001019 """Put the bytes from filename into the archive under the name
1020 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001021 if not self.fp:
1022 raise RuntimeError(
1023 "Attempt to write to ZIP archive that was already closed")
1024
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001026 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027 date_time = mtime[0:6]
1028 # Create ZipInfo instance to store file information
1029 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001030 arcname = filename
1031 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1032 while arcname[0] in (os.sep, os.altsep):
1033 arcname = arcname[1:]
1034 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001035 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001037 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001038 else:
Tim Peterse1190062001-01-15 03:34:38 +00001039 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001040
1041 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001042 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001043 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001044
1045 self._writecheck(zinfo)
1046 self._didModify = True
1047 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001048 # Must overwrite CRC and sizes with correct data later
1049 zinfo.CRC = CRC = 0
1050 zinfo.compress_size = compress_size = 0
1051 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001052 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if zinfo.compress_type == ZIP_DEFLATED:
1054 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1055 zlib.DEFLATED, -15)
1056 else:
1057 cmpr = None
1058 while 1:
1059 buf = fp.read(1024 * 8)
1060 if not buf:
1061 break
1062 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001063 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064 if cmpr:
1065 buf = cmpr.compress(buf)
1066 compress_size = compress_size + len(buf)
1067 self.fp.write(buf)
1068 fp.close()
1069 if cmpr:
1070 buf = cmpr.flush()
1071 compress_size = compress_size + len(buf)
1072 self.fp.write(buf)
1073 zinfo.compress_size = compress_size
1074 else:
1075 zinfo.compress_size = file_size
1076 zinfo.CRC = CRC
1077 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001078 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001079 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001080 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001081 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001083 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001084 self.filelist.append(zinfo)
1085 self.NameToInfo[zinfo.filename] = zinfo
1086
Just van Rossumb083cb32002-12-12 12:23:32 +00001087 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001088 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001089 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1090 the name of the file in the archive."""
1091 if not isinstance(zinfo_or_arcname, ZipInfo):
1092 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001093 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001094 zinfo.compress_type = self.compression
1095 else:
1096 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001097
1098 if not self.fp:
1099 raise RuntimeError(
1100 "Attempt to write to ZIP archive that was already closed")
1101
Tim Peterse1190062001-01-15 03:34:38 +00001102 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001103 zinfo.header_offset = self.fp.tell() # Start of header bytes
1104 self._writecheck(zinfo)
1105 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001106 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001107 if zinfo.compress_type == ZIP_DEFLATED:
1108 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1109 zlib.DEFLATED, -15)
1110 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001111 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 else:
1113 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001114 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001117 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001119 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001120 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001121 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 self.filelist.append(zinfo)
1123 self.NameToInfo[zinfo.filename] = zinfo
1124
1125 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001126 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001127 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128
1129 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001130 """Close the file, and for mode "w" and "a" write the ending
1131 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001132 if self.fp is None:
1133 return
Tim Petersa608bb22006-06-15 18:06:29 +00001134
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001135 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001136 count = 0
1137 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001138 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 count = count + 1
1140 dt = zinfo.date_time
1141 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001142 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001143 extra = []
1144 if zinfo.file_size > ZIP64_LIMIT \
1145 or zinfo.compress_size > ZIP64_LIMIT:
1146 extra.append(zinfo.file_size)
1147 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001148 file_size = 0xffffffff
1149 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001150 else:
1151 file_size = zinfo.file_size
1152 compress_size = zinfo.compress_size
1153
1154 if zinfo.header_offset > ZIP64_LIMIT:
1155 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001156 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001157 else:
1158 header_offset = zinfo.header_offset
1159
1160 extra_data = zinfo.extra
1161 if extra:
1162 # Append a ZIP64 field to the extra's
1163 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001164 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001165 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001166
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001167 extract_version = max(45, zinfo.extract_version)
1168 create_version = max(45, zinfo.create_version)
1169 else:
1170 extract_version = zinfo.extract_version
1171 create_version = zinfo.create_version
1172
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001173 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001174 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001175 centdir = struct.pack(structCentralDir,
Martin v. Löwis8c436412008-07-03 12:51:14 +00001176 magicCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001177 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001178 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001179 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001180 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001181 0, zinfo.internal_attr, zinfo.external_attr,
1182 header_offset)
1183 except DeprecationWarning:
1184 print >>sys.stderr, (structCentralDir,
1185 stringCentralDir, create_version,
1186 zinfo.create_system, extract_version, zinfo.reserved,
1187 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1188 zinfo.CRC, compress_size, file_size,
1189 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1190 0, zinfo.internal_attr, zinfo.external_attr,
1191 header_offset)
1192 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001193 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001194 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001195 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001197
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 pos2 = self.fp.tell()
1199 # Write end-of-zip-archive record
Martin v. Löwis8c436412008-07-03 12:51:14 +00001200 centDirOffset = pos1
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001201 if pos1 > ZIP64_LIMIT:
1202 # Need to write the ZIP64 end-of-archive records
1203 zip64endrec = struct.pack(
Martin v. Löwis8c436412008-07-03 12:51:14 +00001204 structEndCentDir64, magicEndCentDir64,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001205 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1206 self.fp.write(zip64endrec)
1207
1208 zip64locrec = struct.pack(
Martin v. Löwis8c436412008-07-03 12:51:14 +00001209 structEndCentDir64Locator,
1210 magicEndCentDir64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001211 self.fp.write(zip64locrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001212 centDirOffset = 0xFFFFFFFF
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001213
Martin v. Löwis8c436412008-07-03 12:51:14 +00001214 # check for valid comment length
1215 if len(self.comment) >= ZIP_MAX_COMMENT:
1216 if self.debug > 0:
1217 msg = 'Archive comment is too long; truncating to %d bytes' \
1218 % ZIP_MAX_COMMENT
1219 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001220
Martin v. Löwis8c436412008-07-03 12:51:14 +00001221 endrec = struct.pack(structEndCentDir, magicEndCentDir,
1222 0, 0, count % ZIP_FILECOUNT_LIMIT,
1223 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1224 centDirOffset, len(self.comment))
1225 self.fp.write(endrec)
1226 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001227 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001228
Fred Drake3d9091e2001-03-26 15:49:24 +00001229 if not self._filePassed:
1230 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001231 self.fp = None
1232
1233
1234class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001235 """Class to create ZIP archives with Python library files and packages."""
1236
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001237 def writepy(self, pathname, basename = ""):
1238 """Add all files from "pathname" to the ZIP archive.
1239
Fred Drake484d7352000-10-02 21:14:52 +00001240 If pathname is a package directory, search the directory and
1241 all package subdirectories recursively for all *.py and enter
1242 the modules into the archive. If pathname is a plain
1243 directory, listdir *.py and enter all modules. Else, pathname
1244 must be a Python *.py file and the module will be put into the
1245 archive. Added modules are always module.pyo or module.pyc.
1246 This method will compile the module.py into module.pyc if
1247 necessary.
1248 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249 dir, name = os.path.split(pathname)
1250 if os.path.isdir(pathname):
1251 initname = os.path.join(pathname, "__init__.py")
1252 if os.path.isfile(initname):
1253 # This is a package directory, add it
1254 if basename:
1255 basename = "%s/%s" % (basename, name)
1256 else:
1257 basename = name
1258 if self.debug:
1259 print "Adding package in", pathname, "as", basename
1260 fname, arcname = self._get_codename(initname[0:-3], basename)
1261 if self.debug:
1262 print "Adding", arcname
1263 self.write(fname, arcname)
1264 dirlist = os.listdir(pathname)
1265 dirlist.remove("__init__.py")
1266 # Add all *.py files and package subdirectories
1267 for filename in dirlist:
1268 path = os.path.join(pathname, filename)
1269 root, ext = os.path.splitext(filename)
1270 if os.path.isdir(path):
1271 if os.path.isfile(os.path.join(path, "__init__.py")):
1272 # This is a package directory, add it
1273 self.writepy(path, basename) # Recursive call
1274 elif ext == ".py":
1275 fname, arcname = self._get_codename(path[0:-3],
1276 basename)
1277 if self.debug:
1278 print "Adding", arcname
1279 self.write(fname, arcname)
1280 else:
1281 # This is NOT a package directory, add its files at top level
1282 if self.debug:
1283 print "Adding files from directory", pathname
1284 for filename in os.listdir(pathname):
1285 path = os.path.join(pathname, filename)
1286 root, ext = os.path.splitext(filename)
1287 if ext == ".py":
1288 fname, arcname = self._get_codename(path[0:-3],
1289 basename)
1290 if self.debug:
1291 print "Adding", arcname
1292 self.write(fname, arcname)
1293 else:
1294 if pathname[-3:] != ".py":
1295 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001296 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001297 fname, arcname = self._get_codename(pathname[0:-3], basename)
1298 if self.debug:
1299 print "Adding file", arcname
1300 self.write(fname, arcname)
1301
1302 def _get_codename(self, pathname, basename):
1303 """Return (filename, archivename) for the path.
1304
Fred Drake484d7352000-10-02 21:14:52 +00001305 Given a module name path, return the correct file path and
1306 archive name, compiling if necessary. For example, given
1307 /python/lib/string, return (/python/lib/string.pyc, string).
1308 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001309 file_py = pathname + ".py"
1310 file_pyc = pathname + ".pyc"
1311 file_pyo = pathname + ".pyo"
1312 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001313 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001314 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001316 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001317 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 if self.debug:
1319 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001320 try:
1321 py_compile.compile(file_py, file_pyc, None, True)
1322 except py_compile.PyCompileError,err:
1323 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 fname = file_pyc
1325 else:
1326 fname = file_pyc
1327 archivename = os.path.split(fname)[1]
1328 if basename:
1329 archivename = "%s/%s" % (basename, archivename)
1330 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001331
1332
1333def main(args = None):
1334 import textwrap
1335 USAGE=textwrap.dedent("""\
1336 Usage:
1337 zipfile.py -l zipfile.zip # Show listing of a zipfile
1338 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1339 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1340 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1341 """)
1342 if args is None:
1343 args = sys.argv[1:]
1344
1345 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1346 print USAGE
1347 sys.exit(1)
1348
1349 if args[0] == '-l':
1350 if len(args) != 2:
1351 print USAGE
1352 sys.exit(1)
1353 zf = ZipFile(args[1], 'r')
1354 zf.printdir()
1355 zf.close()
1356
1357 elif args[0] == '-t':
1358 if len(args) != 2:
1359 print USAGE
1360 sys.exit(1)
1361 zf = ZipFile(args[1], 'r')
1362 zf.testzip()
1363 print "Done testing"
1364
1365 elif args[0] == '-e':
1366 if len(args) != 3:
1367 print USAGE
1368 sys.exit(1)
1369
1370 zf = ZipFile(args[1], 'r')
1371 out = args[2]
1372 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001373 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001374 tgt = os.path.join(out, path[2:])
1375 else:
1376 tgt = os.path.join(out, path)
1377
1378 tgtdir = os.path.dirname(tgt)
1379 if not os.path.exists(tgtdir):
1380 os.makedirs(tgtdir)
1381 fp = open(tgt, 'wb')
1382 fp.write(zf.read(path))
1383 fp.close()
1384 zf.close()
1385
1386 elif args[0] == '-c':
1387 if len(args) < 3:
1388 print USAGE
1389 sys.exit(1)
1390
1391 def addToZip(zf, path, zippath):
1392 if os.path.isfile(path):
1393 zf.write(path, zippath, ZIP_DEFLATED)
1394 elif os.path.isdir(path):
1395 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001396 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001397 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001398 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001399
1400 zf = ZipFile(args[1], 'w', allowZip64=True)
1401 for src in args[2:]:
1402 addToZip(zf, src, os.path.basename(src))
1403
1404 zf.close()
1405
1406if __name__ == "__main__":
1407 main()