blob: b812a82c54b2c83713fbe9a29ded6efa90b7a863 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
Gregory P. Smith350d03b2008-01-19 23:10:52 +000037structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringEndArchive = "PK\005\006" # magic number for end of archive record
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000039structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringCentralDir = "PK\001\002" # magic number for central directory
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000041structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042stringFileHeader = "PK\003\004" # magic number for file header
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000043structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +000044stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000045structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +000046stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
47
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
87 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Ronald Oussoren143cefb2006-06-15 08:14:18 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000104 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
Tim Petersa608bb22006-06-15 18:06:29 +0000110 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000117 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000143 if endrec[-4] == 0xffffffff:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000163 if endrec[-4] == 0xffffffff:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000191 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000192 )
193
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000195 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000196
197 # Terminate the file name at the first null byte. Null bytes in file
198 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000199 null_byte = filename.find(chr(0))
200 if null_byte >= 0:
201 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000202 # This is used to ensure paths in generated ZIP files always use
203 # forward slashes as the directory separator, as required by the
204 # ZIP format specification.
205 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000206 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000207
Greg Ward8e36d282003-06-18 00:53:06 +0000208 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000209 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000210 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000211 self.compress_type = ZIP_STORED # Type of compression for the file
212 self.comment = "" # Comment for each file
213 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000214 if sys.platform == 'win32':
215 self.create_system = 0 # System which created ZIP archive
216 else:
217 # Assume everything else is unix-y
218 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000219 self.create_version = 20 # Version which created ZIP archive
220 self.extract_version = 20 # Version needed to extract archive
221 self.reserved = 0 # Must be zero
222 self.flag_bits = 0 # ZIP flag bits
223 self.volume = 0 # Volume number of file header
224 self.internal_attr = 0 # Internal attributes
225 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000227 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000228 # CRC CRC-32 of the uncompressed file
229 # compress_size Size of the compressed file
230 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231
232 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000233 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 dt = self.date_time
235 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000236 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000238 # Set these to zero because we write them after the file data
239 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000240 else:
Tim Peterse1190062001-01-15 03:34:38 +0000241 CRC = self.CRC
242 compress_size = self.compress_size
243 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000244
245 extra = self.extra
246
247 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
248 # File is larger than what fits into a 4 byte integer,
249 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000250 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 extra = extra + struct.pack(fmt,
252 1, struct.calcsize(fmt)-4, file_size, compress_size)
253 file_size = 0xffffffff # -1
254 compress_size = 0xffffffff # -1
255 self.extract_version = max(45, self.extract_version)
256 self.create_version = max(45, self.extract_version)
257
Martin v. Löwis471617d2008-05-05 17:16:58 +0000258 filename, flag_bits = self._encodeFilenameFlags()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000259 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000260 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261 self.compress_type, dostime, dosdate, CRC,
262 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000263 len(filename), len(extra))
264 return header + filename + extra
265
266 def _encodeFilenameFlags(self):
267 if isinstance(self.filename, unicode):
268 try:
269 return self.filename.encode('ascii'), self.flag_bits
270 except UnicodeEncodeError:
271 return self.filename.encode('utf-8'), self.flag_bits | 0x800
272 else:
273 return self.filename, self.flag_bits
274
275 def _decodeFilename(self):
276 if self.flag_bits & 0x800:
277 return self.filename.decode('utf-8')
278 else:
279 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000280
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281 def _decodeExtra(self):
282 # Try to decode the extra field.
283 extra = self.extra
284 unpack = struct.unpack
285 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000286 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000287 if tp == 1:
288 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000289 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000291 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000292 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000293 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000294 elif ln == 0:
295 counts = ()
296 else:
297 raise RuntimeError, "Corrupt extra field %s"%(ln,)
298
299 idx = 0
300
301 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000302 # XXX Is this correct? won't this exclude 2**32-1 byte files?
303 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000304 self.file_size = counts[idx]
305 idx += 1
306
307 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
308 self.compress_size = counts[idx]
309 idx += 1
310
311 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
312 old = self.header_offset
313 self.header_offset = counts[idx]
314 idx+=1
315
316 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000317
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000319class _ZipDecrypter:
320 """Class to handle decryption of files stored within a ZIP archive.
321
322 ZIP supports a password-based form of encryption. Even though known
323 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000324 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000325
326 Usage:
327 zd = _ZipDecrypter(mypwd)
328 plain_char = zd(cypher_char)
329 plain_text = map(zd, cypher_text)
330 """
331
332 def _GenerateCRCTable():
333 """Generate a CRC-32 table.
334
335 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
336 internal keys. We noticed that a direct implementation is faster than
337 relying on binascii.crc32().
338 """
339 poly = 0xedb88320
340 table = [0] * 256
341 for i in range(256):
342 crc = i
343 for j in range(8):
344 if crc & 1:
345 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
346 else:
347 crc = ((crc >> 1) & 0x7FFFFFFF)
348 table[i] = crc
349 return table
350 crctable = _GenerateCRCTable()
351
352 def _crc32(self, ch, crc):
353 """Compute the CRC32 primitive on one byte."""
354 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
355
356 def __init__(self, pwd):
357 self.key0 = 305419896
358 self.key1 = 591751049
359 self.key2 = 878082192
360 for p in pwd:
361 self._UpdateKeys(p)
362
363 def _UpdateKeys(self, c):
364 self.key0 = self._crc32(c, self.key0)
365 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
366 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
367 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
368
369 def __call__(self, c):
370 """Decrypt a single character."""
371 c = ord(c)
372 k = self.key2 | 2
373 c = c ^ (((k * (k^1)) >> 8) & 255)
374 c = chr(c)
375 self._UpdateKeys(c)
376 return c
377
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000378class ZipExtFile:
379 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000380 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000381 """
Tim Petersea5962f2007-03-12 18:07:52 +0000382
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000383 def __init__(self, fileobj, zipinfo, decrypt=None):
384 self.fileobj = fileobj
385 self.decrypter = decrypt
386 self.bytes_read = 0L
387 self.rawbuffer = ''
388 self.readbuffer = ''
389 self.linebuffer = ''
390 self.eof = False
391 self.univ_newlines = False
392 self.nlSeps = ("\n", )
393 self.lastdiscard = ''
394
395 self.compress_type = zipinfo.compress_type
396 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000397
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000398 self.closed = False
399 self.mode = "r"
400 self.name = zipinfo.filename
401
402 # read from compressed files in 64k blocks
403 self.compreadsize = 64*1024
404 if self.compress_type == ZIP_DEFLATED:
405 self.dc = zlib.decompressobj(-15)
406
407 def set_univ_newlines(self, univ_newlines):
408 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000409
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000410 # pick line separator char(s) based on universal newlines flag
411 self.nlSeps = ("\n", )
412 if self.univ_newlines:
413 self.nlSeps = ("\r\n", "\r", "\n")
414
415 def __iter__(self):
416 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000417
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000418 def next(self):
419 nextline = self.readline()
420 if not nextline:
421 raise StopIteration()
422
423 return nextline
424
425 def close(self):
426 self.closed = True
427
428 def _checkfornewline(self):
429 nl, nllen = -1, -1
430 if self.linebuffer:
431 # ugly check for cases where half of an \r\n pair was
432 # read on the last pass, and the \r was discarded. In this
433 # case we just throw away the \n at the start of the buffer.
434 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
435 self.linebuffer = self.linebuffer[1:]
436
Tim Petersea5962f2007-03-12 18:07:52 +0000437 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000438 nl = self.linebuffer.find(sep)
439 if nl >= 0:
440 nllen = len(sep)
441 return nl, nllen
442
443 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000444
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000445 def readline(self, size = -1):
446 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000447 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000448 """
449 if size < 0:
450 size = sys.maxint
451 elif size == 0:
452 return ''
453
454 # check for a newline already in buffer
455 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000456
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 if nl >= 0:
458 # the next line was already in the buffer
459 nl = min(nl, size)
460 else:
461 # no line break in buffer - try to read more
462 size -= len(self.linebuffer)
463 while nl < 0 and size > 0:
464 buf = self.read(min(size, 100))
465 if not buf:
466 break
467 self.linebuffer += buf
468 size -= len(buf)
469
470 # check for a newline in buffer
471 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000472
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000473 # we either ran out of bytes in the file, or
474 # met the specified size limit without finding a newline,
475 # so return current buffer
476 if nl < 0:
477 s = self.linebuffer
478 self.linebuffer = ''
479 return s
480
481 buf = self.linebuffer[:nl]
482 self.lastdiscard = self.linebuffer[nl:nl + nllen]
483 self.linebuffer = self.linebuffer[nl + nllen:]
484
485 # line is always returned with \n as newline char (except possibly
486 # for a final incomplete line in the file, which is handled above).
487 return buf + "\n"
488
489 def readlines(self, sizehint = -1):
490 """Return a list with all (following) lines. The sizehint parameter
491 is ignored in this implementation.
492 """
493 result = []
494 while True:
495 line = self.readline()
496 if not line: break
497 result.append(line)
498 return result
499
500 def read(self, size = None):
501 # act like file() obj and return empty string if size is 0
502 if size == 0:
503 return ''
504
505 # determine read size
506 bytesToRead = self.compress_size - self.bytes_read
507
508 # adjust read size for encrypted files since the first 12 bytes
509 # are for the encryption/password information
510 if self.decrypter is not None:
511 bytesToRead -= 12
512
513 if size is not None and size >= 0:
514 if self.compress_type == ZIP_STORED:
515 lr = len(self.readbuffer)
516 bytesToRead = min(bytesToRead, size - lr)
517 elif self.compress_type == ZIP_DEFLATED:
518 if len(self.readbuffer) > size:
519 # the user has requested fewer bytes than we've already
520 # pulled through the decompressor; don't read any more
521 bytesToRead = 0
522 else:
523 # user will use up the buffer, so read some more
524 lr = len(self.rawbuffer)
525 bytesToRead = min(bytesToRead, self.compreadsize - lr)
526
527 # avoid reading past end of file contents
528 if bytesToRead + self.bytes_read > self.compress_size:
529 bytesToRead = self.compress_size - self.bytes_read
530
531 # try to read from file (if necessary)
532 if bytesToRead > 0:
533 bytes = self.fileobj.read(bytesToRead)
534 self.bytes_read += len(bytes)
535 self.rawbuffer += bytes
536
537 # handle contents of raw buffer
538 if self.rawbuffer:
539 newdata = self.rawbuffer
540 self.rawbuffer = ''
541
542 # decrypt new data if we were given an object to handle that
543 if newdata and self.decrypter is not None:
544 newdata = ''.join(map(self.decrypter, newdata))
545
546 # decompress newly read data if necessary
547 if newdata and self.compress_type == ZIP_DEFLATED:
548 newdata = self.dc.decompress(newdata)
549 self.rawbuffer = self.dc.unconsumed_tail
550 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000551 # we're out of raw bytes (both from the file and
552 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000553 # decompressor is done
554 newdata += self.dc.flush()
555 # prevent decompressor from being used again
556 self.dc = None
557
558 self.readbuffer += newdata
559
560
561 # return what the user asked for
562 if size is None or len(self.readbuffer) <= size:
563 bytes = self.readbuffer
564 self.readbuffer = ''
565 else:
566 bytes = self.readbuffer[:size]
567 self.readbuffer = self.readbuffer[size:]
568
569 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000570
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000571
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000572class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000573 """ Class with methods to open, read, write, close, list zip files.
574
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000575 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000576
Fred Drake3d9091e2001-03-26 15:49:24 +0000577 file: Either the path to the file, or a file-like object.
578 If it is a path, the file will be opened and closed by ZipFile.
579 mode: The mode can be either read "r", write "w" or append "a".
580 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000581 allowZip64: if True ZipFile will create files with ZIP64 extensions when
582 needed, otherwise it will raise an exception when this would
583 be necessary.
584
Fred Drake3d9091e2001-03-26 15:49:24 +0000585 """
Fred Drake484d7352000-10-02 21:14:52 +0000586
Fred Drake90eac282001-02-28 05:29:34 +0000587 fp = None # Set here since __del__ checks it
588
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000589 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000590 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000591 if mode not in ("r", "w", "a"):
592 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
593
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000594 if compression == ZIP_STORED:
595 pass
596 elif compression == ZIP_DEFLATED:
597 if not zlib:
598 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000599 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000600 else:
601 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000602
603 self._allowZip64 = allowZip64
604 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000605 self.debug = 0 # Level of printing: 0 through 3
606 self.NameToInfo = {} # Find file info given name
607 self.filelist = [] # List of ZipInfo instances for archive
608 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000609 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000610 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000611
Fred Drake3d9091e2001-03-26 15:49:24 +0000612 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000613 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000614 self._filePassed = 0
615 self.filename = file
616 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000617 try:
618 self.fp = open(file, modeDict[mode])
619 except IOError:
620 if mode == 'a':
621 mode = key = 'w'
622 self.fp = open(file, modeDict[mode])
623 else:
624 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000625 else:
626 self._filePassed = 1
627 self.fp = file
628 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000629
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000630 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000631 self._GetContents()
632 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000633 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000634 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000635 try: # See if file is a zip file
636 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000637 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000638 self.fp.seek(self.start_dir, 0)
639 except BadZipfile: # file is not a zip file, just append
640 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000641 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000642 if not self._filePassed:
643 self.fp.close()
644 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645 raise RuntimeError, 'Mode must be "r", "w" or "a"'
646
647 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000648 """Read the directory, making sure we close the file if the format
649 is bad."""
650 try:
651 self._RealGetContents()
652 except BadZipfile:
653 if not self._filePassed:
654 self.fp.close()
655 self.fp = None
656 raise
657
658 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000659 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000660 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000661 endrec = _EndRecData(fp)
662 if not endrec:
663 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000664 if self.debug > 1:
665 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000666 size_cd = endrec[5] # bytes in central directory
667 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000668 self.comment = endrec[8] # archive comment
669 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000670 if endrec[9] > ZIP64_LIMIT:
671 x = endrec[9] - size_cd - 56 - 20
672 else:
673 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000674 # "concat" is zero, unless zip was concatenated to another file
675 concat = x - offset_cd
676 if self.debug > 2:
677 print "given, inferred, offset", offset_cd, x, concat
678 # self.start_dir: Position of start of central directory
679 self.start_dir = offset_cd + concat
680 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000681 data = fp.read(size_cd)
682 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000683 total = 0
684 while total < size_cd:
685 centdir = fp.read(46)
686 total = total + 46
687 if centdir[0:4] != stringCentralDir:
688 raise BadZipfile, "Bad magic number for central directory"
689 centdir = struct.unpack(structCentralDir, centdir)
690 if self.debug > 2:
691 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000692 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000693 # Create ZipInfo instance to store file information
694 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000695 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
696 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
697 total = (total + centdir[_CD_FILENAME_LENGTH]
698 + centdir[_CD_EXTRA_FIELD_LENGTH]
699 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000700 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701 (x.create_version, x.create_system, x.extract_version, x.reserved,
702 x.flag_bits, x.compress_type, t, d,
703 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
704 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
705 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000706 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000708 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000709
710 x._decodeExtra()
711 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000712 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 self.filelist.append(x)
714 self.NameToInfo[x.filename] = x
715 if self.debug > 2:
716 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000717
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718
719 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000720 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 l = []
722 for data in self.filelist:
723 l.append(data.filename)
724 return l
725
726 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000727 """Return a list of class ZipInfo instances for files in the
728 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 return self.filelist
730
731 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000732 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
734 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000735 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
737
738 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000739 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000740 for zinfo in self.filelist:
741 try:
Tim Peterse1190062001-01-15 03:34:38 +0000742 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000743 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 return zinfo.filename
745
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000746
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000747 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000748 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000749 info = self.NameToInfo.get(name)
750 if info is None:
751 raise KeyError(
752 'There is no item named %r in the archive' % name)
753
754 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000756 def setpassword(self, pwd):
757 """Set default password for encrypted files."""
758 self.pwd = pwd
759
760 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000761 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000762 return self.open(name, "r", pwd).read()
763
764 def open(self, name, mode="r", pwd=None):
765 """Return file-like object for 'name'."""
766 if mode not in ("r", "U", "rU"):
767 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000768 if not self.fp:
769 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000770 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000771
Tim Petersea5962f2007-03-12 18:07:52 +0000772 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000773 # given a file object in the constructor
774 if self._filePassed:
775 zef_file = self.fp
776 else:
777 zef_file = open(self.filename, 'rb')
778
Georg Brandl112aa502008-05-20 08:25:48 +0000779 # Make sure we have an info object
780 if isinstance(name, ZipInfo):
781 # 'name' is already an info object
782 zinfo = name
783 else:
784 # Get info object for name
785 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000786
787 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000788
789 # Skip the file header:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000790 fheader = zef_file.read(30)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000791 if fheader[0:4] != stringFileHeader:
792 raise BadZipfile, "Bad magic number for file header"
793
794 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000795 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000796 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000797 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000798
799 if fname != zinfo.orig_filename:
800 raise BadZipfile, \
801 'File name in directory "%s" and header "%s" differ.' % (
802 zinfo.orig_filename, fname)
803
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000804 # check for encrypted flag & handle password
805 is_encrypted = zinfo.flag_bits & 0x1
806 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000807 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000808 if not pwd:
809 pwd = self.pwd
810 if not pwd:
811 raise RuntimeError, "File %s is encrypted, " \
812 "password required for extraction" % name
813
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000814 zd = _ZipDecrypter(pwd)
815 # The first 12 bytes in the cypher stream is an encryption header
816 # used to strengthen the algorithm. The first 11 bytes are
817 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000818 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000819 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000820 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000821 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000822 if zinfo.flag_bits & 0x8:
823 # compare against the file type from extended local headers
824 check_byte = (zinfo._raw_time >> 8) & 0xff
825 else:
826 # compare against the CRC otherwise
827 check_byte = (zinfo.CRC >> 24) & 0xff
828 if ord(h[11]) != check_byte:
829 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000830
831 # build and return a ZipExtFile
832 if zd is None:
833 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000835 zef = ZipExtFile(zef_file, zinfo, zd)
836
837 # set universal newlines on ZipExtFile if necessary
838 if "U" in mode:
839 zef.set_univ_newlines(True)
840 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841
Georg Brandl62416bc2008-01-07 18:47:44 +0000842 def extract(self, member, path=None, pwd=None):
843 """Extract a member from the archive to the current working directory,
844 using its full name. Its file information is extracted as accurately
845 as possible. `member' may be a filename or a ZipInfo object. You can
846 specify a different directory using `path'.
847 """
848 if not isinstance(member, ZipInfo):
849 member = self.getinfo(member)
850
851 if path is None:
852 path = os.getcwd()
853
854 return self._extract_member(member, path, pwd)
855
856 def extractall(self, path=None, members=None, pwd=None):
857 """Extract all members from the archive to the current working
858 directory. `path' specifies a different directory to extract to.
859 `members' is optional and must be a subset of the list returned
860 by namelist().
861 """
862 if members is None:
863 members = self.namelist()
864
865 for zipinfo in members:
866 self.extract(zipinfo, path, pwd)
867
868 def _extract_member(self, member, targetpath, pwd):
869 """Extract the ZipInfo object 'member' to a physical
870 file on the path targetpath.
871 """
872 # build the destination pathname, replacing
873 # forward slashes to platform specific separators.
874 if targetpath[-1:] == "/":
875 targetpath = targetpath[:-1]
876
877 # don't include leading "/" from file name if present
878 if os.path.isabs(member.filename):
879 targetpath = os.path.join(targetpath, member.filename[1:])
880 else:
881 targetpath = os.path.join(targetpath, member.filename)
882
883 targetpath = os.path.normpath(targetpath)
884
885 # Create all upper directories if necessary.
886 upperdirs = os.path.dirname(targetpath)
887 if upperdirs and not os.path.exists(upperdirs):
888 os.makedirs(upperdirs)
889
Georg Brandl112aa502008-05-20 08:25:48 +0000890 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000891 target = file(targetpath, "wb")
892 shutil.copyfileobj(source, target)
893 source.close()
894 target.close()
895
896 return targetpath
897
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000899 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000900 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000901 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000902 print "Duplicate name:", zinfo.filename
903 if self.mode not in ("w", "a"):
904 raise RuntimeError, 'write() requires mode "w" or "a"'
905 if not self.fp:
906 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000907 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000908 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
909 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000910 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000911 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
912 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000913 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000914 if zinfo.file_size > ZIP64_LIMIT:
915 if not self._allowZip64:
916 raise LargeZipFile("Filesize would require ZIP64 extensions")
917 if zinfo.header_offset > ZIP64_LIMIT:
918 if not self._allowZip64:
919 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000920
921 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000922 """Put the bytes from filename into the archive under the name
923 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000924 if not self.fp:
925 raise RuntimeError(
926 "Attempt to write to ZIP archive that was already closed")
927
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000928 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000929 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000930 date_time = mtime[0:6]
931 # Create ZipInfo instance to store file information
932 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000933 arcname = filename
934 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
935 while arcname[0] in (os.sep, os.altsep):
936 arcname = arcname[1:]
937 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000938 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000939 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000940 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000941 else:
Tim Peterse1190062001-01-15 03:34:38 +0000942 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000943
944 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000945 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000946 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000947
948 self._writecheck(zinfo)
949 self._didModify = True
950 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000951 # Must overwrite CRC and sizes with correct data later
952 zinfo.CRC = CRC = 0
953 zinfo.compress_size = compress_size = 0
954 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000955 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000956 if zinfo.compress_type == ZIP_DEFLATED:
957 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
958 zlib.DEFLATED, -15)
959 else:
960 cmpr = None
961 while 1:
962 buf = fp.read(1024 * 8)
963 if not buf:
964 break
965 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000966 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000967 if cmpr:
968 buf = cmpr.compress(buf)
969 compress_size = compress_size + len(buf)
970 self.fp.write(buf)
971 fp.close()
972 if cmpr:
973 buf = cmpr.flush()
974 compress_size = compress_size + len(buf)
975 self.fp.write(buf)
976 zinfo.compress_size = compress_size
977 else:
978 zinfo.compress_size = file_size
979 zinfo.CRC = CRC
980 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000981 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000982 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000983 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000984 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000985 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000986 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 self.filelist.append(zinfo)
988 self.NameToInfo[zinfo.filename] = zinfo
989
Just van Rossumb083cb32002-12-12 12:23:32 +0000990 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000991 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000992 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
993 the name of the file in the archive."""
994 if not isinstance(zinfo_or_arcname, ZipInfo):
995 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000996 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +0000997 zinfo.compress_type = self.compression
998 else:
999 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001000
1001 if not self.fp:
1002 raise RuntimeError(
1003 "Attempt to write to ZIP archive that was already closed")
1004
Tim Peterse1190062001-01-15 03:34:38 +00001005 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001006 zinfo.header_offset = self.fp.tell() # Start of header bytes
1007 self._writecheck(zinfo)
1008 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001009 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 if zinfo.compress_type == ZIP_DEFLATED:
1011 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1012 zlib.DEFLATED, -15)
1013 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001014 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 else:
1016 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001017 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001020 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001022 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001023 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001024 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 self.filelist.append(zinfo)
1026 self.NameToInfo[zinfo.filename] = zinfo
1027
1028 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001029 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001030 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001031
1032 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001033 """Close the file, and for mode "w" and "a" write the ending
1034 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001035 if self.fp is None:
1036 return
Tim Petersa608bb22006-06-15 18:06:29 +00001037
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001038 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001039 count = 0
1040 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001041 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042 count = count + 1
1043 dt = zinfo.date_time
1044 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001045 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001046 extra = []
1047 if zinfo.file_size > ZIP64_LIMIT \
1048 or zinfo.compress_size > ZIP64_LIMIT:
1049 extra.append(zinfo.file_size)
1050 extra.append(zinfo.compress_size)
1051 file_size = 0xffffffff #-1
1052 compress_size = 0xffffffff #-1
1053 else:
1054 file_size = zinfo.file_size
1055 compress_size = zinfo.compress_size
1056
1057 if zinfo.header_offset > ZIP64_LIMIT:
1058 extra.append(zinfo.header_offset)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001059 header_offset = 0xffffffffL # -1 32 bit
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001060 else:
1061 header_offset = zinfo.header_offset
1062
1063 extra_data = zinfo.extra
1064 if extra:
1065 # Append a ZIP64 field to the extra's
1066 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001067 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001068 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001069
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001070 extract_version = max(45, zinfo.extract_version)
1071 create_version = max(45, zinfo.create_version)
1072 else:
1073 extract_version = zinfo.extract_version
1074 create_version = zinfo.create_version
1075
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001076 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001077 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001078 centdir = struct.pack(structCentralDir,
1079 stringCentralDir, create_version,
1080 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001081 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001082 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001083 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001084 0, zinfo.internal_attr, zinfo.external_attr,
1085 header_offset)
1086 except DeprecationWarning:
1087 print >>sys.stderr, (structCentralDir,
1088 stringCentralDir, create_version,
1089 zinfo.create_system, extract_version, zinfo.reserved,
1090 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1091 zinfo.CRC, compress_size, file_size,
1092 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1093 0, zinfo.internal_attr, zinfo.external_attr,
1094 header_offset)
1095 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001097 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001098 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001100
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 pos2 = self.fp.tell()
1102 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001103 if pos1 > ZIP64_LIMIT:
1104 # Need to write the ZIP64 end-of-archive records
1105 zip64endrec = struct.pack(
1106 structEndArchive64, stringEndArchive64,
1107 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1108 self.fp.write(zip64endrec)
1109
1110 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +00001111 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001112 stringEndArchive64Locator, 0, pos2, 1)
1113 self.fp.write(zip64locrec)
1114
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001115 endrec = struct.pack(structEndArchive, stringEndArchive,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001116 0, 0, count, count, pos2 - pos1, 0xffffffffL, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001117 self.fp.write(endrec)
1118
1119 else:
1120 endrec = struct.pack(structEndArchive, stringEndArchive,
1121 0, 0, count, count, pos2 - pos1, pos1, 0)
1122 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001123 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001124 if not self._filePassed:
1125 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 self.fp = None
1127
1128
1129class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001130 """Class to create ZIP archives with Python library files and packages."""
1131
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 def writepy(self, pathname, basename = ""):
1133 """Add all files from "pathname" to the ZIP archive.
1134
Fred Drake484d7352000-10-02 21:14:52 +00001135 If pathname is a package directory, search the directory and
1136 all package subdirectories recursively for all *.py and enter
1137 the modules into the archive. If pathname is a plain
1138 directory, listdir *.py and enter all modules. Else, pathname
1139 must be a Python *.py file and the module will be put into the
1140 archive. Added modules are always module.pyo or module.pyc.
1141 This method will compile the module.py into module.pyc if
1142 necessary.
1143 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144 dir, name = os.path.split(pathname)
1145 if os.path.isdir(pathname):
1146 initname = os.path.join(pathname, "__init__.py")
1147 if os.path.isfile(initname):
1148 # This is a package directory, add it
1149 if basename:
1150 basename = "%s/%s" % (basename, name)
1151 else:
1152 basename = name
1153 if self.debug:
1154 print "Adding package in", pathname, "as", basename
1155 fname, arcname = self._get_codename(initname[0:-3], basename)
1156 if self.debug:
1157 print "Adding", arcname
1158 self.write(fname, arcname)
1159 dirlist = os.listdir(pathname)
1160 dirlist.remove("__init__.py")
1161 # Add all *.py files and package subdirectories
1162 for filename in dirlist:
1163 path = os.path.join(pathname, filename)
1164 root, ext = os.path.splitext(filename)
1165 if os.path.isdir(path):
1166 if os.path.isfile(os.path.join(path, "__init__.py")):
1167 # This is a package directory, add it
1168 self.writepy(path, basename) # Recursive call
1169 elif ext == ".py":
1170 fname, arcname = self._get_codename(path[0:-3],
1171 basename)
1172 if self.debug:
1173 print "Adding", arcname
1174 self.write(fname, arcname)
1175 else:
1176 # This is NOT a package directory, add its files at top level
1177 if self.debug:
1178 print "Adding files from directory", pathname
1179 for filename in os.listdir(pathname):
1180 path = os.path.join(pathname, filename)
1181 root, ext = os.path.splitext(filename)
1182 if ext == ".py":
1183 fname, arcname = self._get_codename(path[0:-3],
1184 basename)
1185 if self.debug:
1186 print "Adding", arcname
1187 self.write(fname, arcname)
1188 else:
1189 if pathname[-3:] != ".py":
1190 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001191 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 fname, arcname = self._get_codename(pathname[0:-3], basename)
1193 if self.debug:
1194 print "Adding file", arcname
1195 self.write(fname, arcname)
1196
1197 def _get_codename(self, pathname, basename):
1198 """Return (filename, archivename) for the path.
1199
Fred Drake484d7352000-10-02 21:14:52 +00001200 Given a module name path, return the correct file path and
1201 archive name, compiling if necessary. For example, given
1202 /python/lib/string, return (/python/lib/string.pyc, string).
1203 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 file_py = pathname + ".py"
1205 file_pyc = pathname + ".pyc"
1206 file_pyo = pathname + ".pyo"
1207 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001208 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001209 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001211 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001212 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213 if self.debug:
1214 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001215 try:
1216 py_compile.compile(file_py, file_pyc, None, True)
1217 except py_compile.PyCompileError,err:
1218 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 fname = file_pyc
1220 else:
1221 fname = file_pyc
1222 archivename = os.path.split(fname)[1]
1223 if basename:
1224 archivename = "%s/%s" % (basename, archivename)
1225 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001226
1227
1228def main(args = None):
1229 import textwrap
1230 USAGE=textwrap.dedent("""\
1231 Usage:
1232 zipfile.py -l zipfile.zip # Show listing of a zipfile
1233 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1234 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1235 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1236 """)
1237 if args is None:
1238 args = sys.argv[1:]
1239
1240 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1241 print USAGE
1242 sys.exit(1)
1243
1244 if args[0] == '-l':
1245 if len(args) != 2:
1246 print USAGE
1247 sys.exit(1)
1248 zf = ZipFile(args[1], 'r')
1249 zf.printdir()
1250 zf.close()
1251
1252 elif args[0] == '-t':
1253 if len(args) != 2:
1254 print USAGE
1255 sys.exit(1)
1256 zf = ZipFile(args[1], 'r')
1257 zf.testzip()
1258 print "Done testing"
1259
1260 elif args[0] == '-e':
1261 if len(args) != 3:
1262 print USAGE
1263 sys.exit(1)
1264
1265 zf = ZipFile(args[1], 'r')
1266 out = args[2]
1267 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001268 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001269 tgt = os.path.join(out, path[2:])
1270 else:
1271 tgt = os.path.join(out, path)
1272
1273 tgtdir = os.path.dirname(tgt)
1274 if not os.path.exists(tgtdir):
1275 os.makedirs(tgtdir)
1276 fp = open(tgt, 'wb')
1277 fp.write(zf.read(path))
1278 fp.close()
1279 zf.close()
1280
1281 elif args[0] == '-c':
1282 if len(args) < 3:
1283 print USAGE
1284 sys.exit(1)
1285
1286 def addToZip(zf, path, zippath):
1287 if os.path.isfile(path):
1288 zf.write(path, zippath, ZIP_DEFLATED)
1289 elif os.path.isdir(path):
1290 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001291 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001292 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001293 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001294
1295 zf = ZipFile(args[1], 'w', allowZip64=True)
1296 for src in args[2:]:
1297 addToZip(zf, src, os.path.basename(src))
1298
1299 zf.close()
1300
1301if __name__ == "__main__":
1302 main()