blob: b601e7471e7eaf102eed7c0674b14b5495f28f22 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
Gregory P. Smith350d03b2008-01-19 23:10:52 +000037structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringEndArchive = "PK\005\006" # magic number for end of archive record
Gregory P. Smith350d03b2008-01-19 23:10:52 +000039structCentralDir = "<4s4B4HlLL5HLL"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000041structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042stringFileHeader = "PK\003\004" # magic number for file header
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
44stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
45structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
46stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
47
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
87 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Ronald Oussoren143cefb2006-06-15 08:14:18 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000104 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
Tim Petersa608bb22006-06-15 18:06:29 +0000110 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000117 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000191 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000192 )
193
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000195 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000196
197 # Terminate the file name at the first null byte. Null bytes in file
198 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000199 null_byte = filename.find(chr(0))
200 if null_byte >= 0:
201 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000202 # This is used to ensure paths in generated ZIP files always use
203 # forward slashes as the directory separator, as required by the
204 # ZIP format specification.
205 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000206 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000207
Greg Ward8e36d282003-06-18 00:53:06 +0000208 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000209 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000210 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000211 self.compress_type = ZIP_STORED # Type of compression for the file
212 self.comment = "" # Comment for each file
213 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000214 if sys.platform == 'win32':
215 self.create_system = 0 # System which created ZIP archive
216 else:
217 # Assume everything else is unix-y
218 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000219 self.create_version = 20 # Version which created ZIP archive
220 self.extract_version = 20 # Version needed to extract archive
221 self.reserved = 0 # Must be zero
222 self.flag_bits = 0 # ZIP flag bits
223 self.volume = 0 # Volume number of file header
224 self.internal_attr = 0 # Internal attributes
225 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000227 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000228 # CRC CRC-32 of the uncompressed file
229 # compress_size Size of the compressed file
230 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231
232 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000233 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 dt = self.date_time
235 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000236 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000238 # Set these to zero because we write them after the file data
239 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000240 else:
Tim Peterse1190062001-01-15 03:34:38 +0000241 CRC = self.CRC
242 compress_size = self.compress_size
243 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000244
245 extra = self.extra
246
247 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
248 # File is larger than what fits into a 4 byte integer,
249 # fall back to the ZIP64 extension
250 fmt = '<hhqq'
251 extra = extra + struct.pack(fmt,
252 1, struct.calcsize(fmt)-4, file_size, compress_size)
253 file_size = 0xffffffff # -1
254 compress_size = 0xffffffff # -1
255 self.extract_version = max(45, self.extract_version)
256 self.create_version = max(45, self.extract_version)
257
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000258 header = struct.pack(structFileHeader, stringFileHeader,
259 self.extract_version, self.reserved, self.flag_bits,
260 self.compress_type, dostime, dosdate, CRC,
261 compress_size, file_size,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262 len(self.filename), len(extra))
263 return header + self.filename + extra
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000264
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000265 def _decodeExtra(self):
266 # Try to decode the extra field.
267 extra = self.extra
268 unpack = struct.unpack
269 while extra:
270 tp, ln = unpack('<hh', extra[:4])
271 if tp == 1:
272 if ln >= 24:
273 counts = unpack('<qqq', extra[4:28])
274 elif ln == 16:
275 counts = unpack('<qq', extra[4:20])
276 elif ln == 8:
277 counts = unpack('<q', extra[4:12])
278 elif ln == 0:
279 counts = ()
280 else:
281 raise RuntimeError, "Corrupt extra field %s"%(ln,)
282
283 idx = 0
284
285 # ZIP64 extension (large files and/or large archives)
286 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
287 self.file_size = counts[idx]
288 idx += 1
289
290 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
291 self.compress_size = counts[idx]
292 idx += 1
293
294 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
295 old = self.header_offset
296 self.header_offset = counts[idx]
297 idx+=1
298
299 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000300
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000301
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000302class _ZipDecrypter:
303 """Class to handle decryption of files stored within a ZIP archive.
304
305 ZIP supports a password-based form of encryption. Even though known
306 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000307 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000308
309 Usage:
310 zd = _ZipDecrypter(mypwd)
311 plain_char = zd(cypher_char)
312 plain_text = map(zd, cypher_text)
313 """
314
315 def _GenerateCRCTable():
316 """Generate a CRC-32 table.
317
318 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
319 internal keys. We noticed that a direct implementation is faster than
320 relying on binascii.crc32().
321 """
322 poly = 0xedb88320
323 table = [0] * 256
324 for i in range(256):
325 crc = i
326 for j in range(8):
327 if crc & 1:
328 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
329 else:
330 crc = ((crc >> 1) & 0x7FFFFFFF)
331 table[i] = crc
332 return table
333 crctable = _GenerateCRCTable()
334
335 def _crc32(self, ch, crc):
336 """Compute the CRC32 primitive on one byte."""
337 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
338
339 def __init__(self, pwd):
340 self.key0 = 305419896
341 self.key1 = 591751049
342 self.key2 = 878082192
343 for p in pwd:
344 self._UpdateKeys(p)
345
346 def _UpdateKeys(self, c):
347 self.key0 = self._crc32(c, self.key0)
348 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
349 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
350 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
351
352 def __call__(self, c):
353 """Decrypt a single character."""
354 c = ord(c)
355 k = self.key2 | 2
356 c = c ^ (((k * (k^1)) >> 8) & 255)
357 c = chr(c)
358 self._UpdateKeys(c)
359 return c
360
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000361class ZipExtFile:
362 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000363 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000364 """
Tim Petersea5962f2007-03-12 18:07:52 +0000365
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000366 def __init__(self, fileobj, zipinfo, decrypt=None):
367 self.fileobj = fileobj
368 self.decrypter = decrypt
369 self.bytes_read = 0L
370 self.rawbuffer = ''
371 self.readbuffer = ''
372 self.linebuffer = ''
373 self.eof = False
374 self.univ_newlines = False
375 self.nlSeps = ("\n", )
376 self.lastdiscard = ''
377
378 self.compress_type = zipinfo.compress_type
379 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000380
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000381 self.closed = False
382 self.mode = "r"
383 self.name = zipinfo.filename
384
385 # read from compressed files in 64k blocks
386 self.compreadsize = 64*1024
387 if self.compress_type == ZIP_DEFLATED:
388 self.dc = zlib.decompressobj(-15)
389
390 def set_univ_newlines(self, univ_newlines):
391 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000392
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000393 # pick line separator char(s) based on universal newlines flag
394 self.nlSeps = ("\n", )
395 if self.univ_newlines:
396 self.nlSeps = ("\r\n", "\r", "\n")
397
398 def __iter__(self):
399 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000400
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000401 def next(self):
402 nextline = self.readline()
403 if not nextline:
404 raise StopIteration()
405
406 return nextline
407
408 def close(self):
409 self.closed = True
410
411 def _checkfornewline(self):
412 nl, nllen = -1, -1
413 if self.linebuffer:
414 # ugly check for cases where half of an \r\n pair was
415 # read on the last pass, and the \r was discarded. In this
416 # case we just throw away the \n at the start of the buffer.
417 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
418 self.linebuffer = self.linebuffer[1:]
419
Tim Petersea5962f2007-03-12 18:07:52 +0000420 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000421 nl = self.linebuffer.find(sep)
422 if nl >= 0:
423 nllen = len(sep)
424 return nl, nllen
425
426 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000427
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000428 def readline(self, size = -1):
429 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000430 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000431 """
432 if size < 0:
433 size = sys.maxint
434 elif size == 0:
435 return ''
436
437 # check for a newline already in buffer
438 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000439
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000440 if nl >= 0:
441 # the next line was already in the buffer
442 nl = min(nl, size)
443 else:
444 # no line break in buffer - try to read more
445 size -= len(self.linebuffer)
446 while nl < 0 and size > 0:
447 buf = self.read(min(size, 100))
448 if not buf:
449 break
450 self.linebuffer += buf
451 size -= len(buf)
452
453 # check for a newline in buffer
454 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000455
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000456 # we either ran out of bytes in the file, or
457 # met the specified size limit without finding a newline,
458 # so return current buffer
459 if nl < 0:
460 s = self.linebuffer
461 self.linebuffer = ''
462 return s
463
464 buf = self.linebuffer[:nl]
465 self.lastdiscard = self.linebuffer[nl:nl + nllen]
466 self.linebuffer = self.linebuffer[nl + nllen:]
467
468 # line is always returned with \n as newline char (except possibly
469 # for a final incomplete line in the file, which is handled above).
470 return buf + "\n"
471
472 def readlines(self, sizehint = -1):
473 """Return a list with all (following) lines. The sizehint parameter
474 is ignored in this implementation.
475 """
476 result = []
477 while True:
478 line = self.readline()
479 if not line: break
480 result.append(line)
481 return result
482
483 def read(self, size = None):
484 # act like file() obj and return empty string if size is 0
485 if size == 0:
486 return ''
487
488 # determine read size
489 bytesToRead = self.compress_size - self.bytes_read
490
491 # adjust read size for encrypted files since the first 12 bytes
492 # are for the encryption/password information
493 if self.decrypter is not None:
494 bytesToRead -= 12
495
496 if size is not None and size >= 0:
497 if self.compress_type == ZIP_STORED:
498 lr = len(self.readbuffer)
499 bytesToRead = min(bytesToRead, size - lr)
500 elif self.compress_type == ZIP_DEFLATED:
501 if len(self.readbuffer) > size:
502 # the user has requested fewer bytes than we've already
503 # pulled through the decompressor; don't read any more
504 bytesToRead = 0
505 else:
506 # user will use up the buffer, so read some more
507 lr = len(self.rawbuffer)
508 bytesToRead = min(bytesToRead, self.compreadsize - lr)
509
510 # avoid reading past end of file contents
511 if bytesToRead + self.bytes_read > self.compress_size:
512 bytesToRead = self.compress_size - self.bytes_read
513
514 # try to read from file (if necessary)
515 if bytesToRead > 0:
516 bytes = self.fileobj.read(bytesToRead)
517 self.bytes_read += len(bytes)
518 self.rawbuffer += bytes
519
520 # handle contents of raw buffer
521 if self.rawbuffer:
522 newdata = self.rawbuffer
523 self.rawbuffer = ''
524
525 # decrypt new data if we were given an object to handle that
526 if newdata and self.decrypter is not None:
527 newdata = ''.join(map(self.decrypter, newdata))
528
529 # decompress newly read data if necessary
530 if newdata and self.compress_type == ZIP_DEFLATED:
531 newdata = self.dc.decompress(newdata)
532 self.rawbuffer = self.dc.unconsumed_tail
533 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000534 # we're out of raw bytes (both from the file and
535 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000536 # decompressor is done
537 newdata += self.dc.flush()
538 # prevent decompressor from being used again
539 self.dc = None
540
541 self.readbuffer += newdata
542
543
544 # return what the user asked for
545 if size is None or len(self.readbuffer) <= size:
546 bytes = self.readbuffer
547 self.readbuffer = ''
548 else:
549 bytes = self.readbuffer[:size]
550 self.readbuffer = self.readbuffer[size:]
551
552 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000553
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000554
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000555class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000556 """ Class with methods to open, read, write, close, list zip files.
557
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000558 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000559
Fred Drake3d9091e2001-03-26 15:49:24 +0000560 file: Either the path to the file, or a file-like object.
561 If it is a path, the file will be opened and closed by ZipFile.
562 mode: The mode can be either read "r", write "w" or append "a".
563 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000564 allowZip64: if True ZipFile will create files with ZIP64 extensions when
565 needed, otherwise it will raise an exception when this would
566 be necessary.
567
Fred Drake3d9091e2001-03-26 15:49:24 +0000568 """
Fred Drake484d7352000-10-02 21:14:52 +0000569
Fred Drake90eac282001-02-28 05:29:34 +0000570 fp = None # Set here since __del__ checks it
571
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000572 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000573 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000574 if mode not in ("r", "w", "a"):
575 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
576
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000577 if compression == ZIP_STORED:
578 pass
579 elif compression == ZIP_DEFLATED:
580 if not zlib:
581 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000582 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000583 else:
584 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000585
586 self._allowZip64 = allowZip64
587 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000588 self.debug = 0 # Level of printing: 0 through 3
589 self.NameToInfo = {} # Find file info given name
590 self.filelist = [] # List of ZipInfo instances for archive
591 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000592 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000593 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000594
Fred Drake3d9091e2001-03-26 15:49:24 +0000595 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000596 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000597 self._filePassed = 0
598 self.filename = file
599 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000600 try:
601 self.fp = open(file, modeDict[mode])
602 except IOError:
603 if mode == 'a':
604 mode = key = 'w'
605 self.fp = open(file, modeDict[mode])
606 else:
607 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000608 else:
609 self._filePassed = 1
610 self.fp = file
611 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000612
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000613 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000614 self._GetContents()
615 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000616 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000618 try: # See if file is a zip file
619 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000620 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000621 self.fp.seek(self.start_dir, 0)
622 except BadZipfile: # file is not a zip file, just append
623 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000625 if not self._filePassed:
626 self.fp.close()
627 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 raise RuntimeError, 'Mode must be "r", "w" or "a"'
629
630 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000631 """Read the directory, making sure we close the file if the format
632 is bad."""
633 try:
634 self._RealGetContents()
635 except BadZipfile:
636 if not self._filePassed:
637 self.fp.close()
638 self.fp = None
639 raise
640
641 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000642 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000643 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000644 endrec = _EndRecData(fp)
645 if not endrec:
646 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647 if self.debug > 1:
648 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000649 size_cd = endrec[5] # bytes in central directory
650 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000651 self.comment = endrec[8] # archive comment
652 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000653 if endrec[9] > ZIP64_LIMIT:
654 x = endrec[9] - size_cd - 56 - 20
655 else:
656 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000657 # "concat" is zero, unless zip was concatenated to another file
658 concat = x - offset_cd
659 if self.debug > 2:
660 print "given, inferred, offset", offset_cd, x, concat
661 # self.start_dir: Position of start of central directory
662 self.start_dir = offset_cd + concat
663 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000664 data = fp.read(size_cd)
665 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000666 total = 0
667 while total < size_cd:
668 centdir = fp.read(46)
669 total = total + 46
670 if centdir[0:4] != stringCentralDir:
671 raise BadZipfile, "Bad magic number for central directory"
672 centdir = struct.unpack(structCentralDir, centdir)
673 if self.debug > 2:
674 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000675 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 # Create ZipInfo instance to store file information
677 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000678 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
679 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
680 total = (total + centdir[_CD_FILENAME_LENGTH]
681 + centdir[_CD_EXTRA_FIELD_LENGTH]
682 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000683 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 (x.create_version, x.create_system, x.extract_version, x.reserved,
685 x.flag_bits, x.compress_type, t, d,
686 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
687 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
688 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000689 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000691 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000692
693 x._decodeExtra()
694 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000695 self.filelist.append(x)
696 self.NameToInfo[x.filename] = x
697 if self.debug > 2:
698 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000699
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000700
701 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000702 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 l = []
704 for data in self.filelist:
705 l.append(data.filename)
706 return l
707
708 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000709 """Return a list of class ZipInfo instances for files in the
710 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 return self.filelist
712
713 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000714 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000715 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
716 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000717 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
719
720 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000721 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 for zinfo in self.filelist:
723 try:
Tim Peterse1190062001-01-15 03:34:38 +0000724 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000725 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 return zinfo.filename
727
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000728
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000730 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000731 info = self.NameToInfo.get(name)
732 if info is None:
733 raise KeyError(
734 'There is no item named %r in the archive' % name)
735
736 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000737
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000738 def setpassword(self, pwd):
739 """Set default password for encrypted files."""
740 self.pwd = pwd
741
742 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000743 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000744 return self.open(name, "r", pwd).read()
745
746 def open(self, name, mode="r", pwd=None):
747 """Return file-like object for 'name'."""
748 if mode not in ("r", "U", "rU"):
749 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 if not self.fp:
751 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000752 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000753
Tim Petersea5962f2007-03-12 18:07:52 +0000754 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000755 # given a file object in the constructor
756 if self._filePassed:
757 zef_file = self.fp
758 else:
759 zef_file = open(self.filename, 'rb')
760
761 # Get info object for name
762 zinfo = self.getinfo(name)
763
764 filepos = zef_file.tell()
765
766 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000767
768 # Skip the file header:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000769 fheader = zef_file.read(30)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000770 if fheader[0:4] != stringFileHeader:
771 raise BadZipfile, "Bad magic number for file header"
772
773 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000774 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000775 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000776 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000777
778 if fname != zinfo.orig_filename:
779 raise BadZipfile, \
780 'File name in directory "%s" and header "%s" differ.' % (
781 zinfo.orig_filename, fname)
782
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000783 # check for encrypted flag & handle password
784 is_encrypted = zinfo.flag_bits & 0x1
785 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000786 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000787 if not pwd:
788 pwd = self.pwd
789 if not pwd:
790 raise RuntimeError, "File %s is encrypted, " \
791 "password required for extraction" % name
792
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000793 zd = _ZipDecrypter(pwd)
794 # The first 12 bytes in the cypher stream is an encryption header
795 # used to strengthen the algorithm. The first 11 bytes are
796 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000797 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000798 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000799 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000800 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000801 if zinfo.flag_bits & 0x8:
802 # compare against the file type from extended local headers
803 check_byte = (zinfo._raw_time >> 8) & 0xff
804 else:
805 # compare against the CRC otherwise
806 check_byte = (zinfo.CRC >> 24) & 0xff
807 if ord(h[11]) != check_byte:
808 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000809
810 # build and return a ZipExtFile
811 if zd is None:
812 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000814 zef = ZipExtFile(zef_file, zinfo, zd)
815
816 # set universal newlines on ZipExtFile if necessary
817 if "U" in mode:
818 zef.set_univ_newlines(True)
819 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820
Georg Brandl62416bc2008-01-07 18:47:44 +0000821 def extract(self, member, path=None, pwd=None):
822 """Extract a member from the archive to the current working directory,
823 using its full name. Its file information is extracted as accurately
824 as possible. `member' may be a filename or a ZipInfo object. You can
825 specify a different directory using `path'.
826 """
827 if not isinstance(member, ZipInfo):
828 member = self.getinfo(member)
829
830 if path is None:
831 path = os.getcwd()
832
833 return self._extract_member(member, path, pwd)
834
835 def extractall(self, path=None, members=None, pwd=None):
836 """Extract all members from the archive to the current working
837 directory. `path' specifies a different directory to extract to.
838 `members' is optional and must be a subset of the list returned
839 by namelist().
840 """
841 if members is None:
842 members = self.namelist()
843
844 for zipinfo in members:
845 self.extract(zipinfo, path, pwd)
846
847 def _extract_member(self, member, targetpath, pwd):
848 """Extract the ZipInfo object 'member' to a physical
849 file on the path targetpath.
850 """
851 # build the destination pathname, replacing
852 # forward slashes to platform specific separators.
853 if targetpath[-1:] == "/":
854 targetpath = targetpath[:-1]
855
856 # don't include leading "/" from file name if present
857 if os.path.isabs(member.filename):
858 targetpath = os.path.join(targetpath, member.filename[1:])
859 else:
860 targetpath = os.path.join(targetpath, member.filename)
861
862 targetpath = os.path.normpath(targetpath)
863
864 # Create all upper directories if necessary.
865 upperdirs = os.path.dirname(targetpath)
866 if upperdirs and not os.path.exists(upperdirs):
867 os.makedirs(upperdirs)
868
869 source = self.open(member.filename, pwd=pwd)
870 target = file(targetpath, "wb")
871 shutil.copyfileobj(source, target)
872 source.close()
873 target.close()
874
875 return targetpath
876
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000878 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000879 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000880 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 print "Duplicate name:", zinfo.filename
882 if self.mode not in ("w", "a"):
883 raise RuntimeError, 'write() requires mode "w" or "a"'
884 if not self.fp:
885 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000886 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000887 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
888 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000889 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000890 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
891 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000892 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000893 if zinfo.file_size > ZIP64_LIMIT:
894 if not self._allowZip64:
895 raise LargeZipFile("Filesize would require ZIP64 extensions")
896 if zinfo.header_offset > ZIP64_LIMIT:
897 if not self._allowZip64:
898 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000899
900 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000901 """Put the bytes from filename into the archive under the name
902 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000903 if not self.fp:
904 raise RuntimeError(
905 "Attempt to write to ZIP archive that was already closed")
906
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000908 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 date_time = mtime[0:6]
910 # Create ZipInfo instance to store file information
911 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000912 arcname = filename
913 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
914 while arcname[0] in (os.sep, os.altsep):
915 arcname = arcname[1:]
916 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000917 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000918 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000919 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000920 else:
Tim Peterse1190062001-01-15 03:34:38 +0000921 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000922
923 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000924 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000925 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000926
927 self._writecheck(zinfo)
928 self._didModify = True
929 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000930 # Must overwrite CRC and sizes with correct data later
931 zinfo.CRC = CRC = 0
932 zinfo.compress_size = compress_size = 0
933 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000934 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000935 if zinfo.compress_type == ZIP_DEFLATED:
936 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
937 zlib.DEFLATED, -15)
938 else:
939 cmpr = None
940 while 1:
941 buf = fp.read(1024 * 8)
942 if not buf:
943 break
944 file_size = file_size + len(buf)
Gregory P. Smithb89a0962008-03-19 01:46:10 +0000945 CRC = crc32(buf, CRC)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000946 if cmpr:
947 buf = cmpr.compress(buf)
948 compress_size = compress_size + len(buf)
949 self.fp.write(buf)
950 fp.close()
951 if cmpr:
952 buf = cmpr.flush()
953 compress_size = compress_size + len(buf)
954 self.fp.write(buf)
955 zinfo.compress_size = compress_size
956 else:
957 zinfo.compress_size = file_size
958 zinfo.CRC = CRC
959 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000960 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000961 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000962 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000963 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000965 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000966 self.filelist.append(zinfo)
967 self.NameToInfo[zinfo.filename] = zinfo
968
Just van Rossumb083cb32002-12-12 12:23:32 +0000969 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000970 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000971 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
972 the name of the file in the archive."""
973 if not isinstance(zinfo_or_arcname, ZipInfo):
974 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000975 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +0000976 zinfo.compress_type = self.compression
977 else:
978 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000979
980 if not self.fp:
981 raise RuntimeError(
982 "Attempt to write to ZIP archive that was already closed")
983
Tim Peterse1190062001-01-15 03:34:38 +0000984 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000985 zinfo.header_offset = self.fp.tell() # Start of header bytes
986 self._writecheck(zinfo)
987 self._didModify = True
Gregory P. Smithb89a0962008-03-19 01:46:10 +0000988 zinfo.CRC = crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989 if zinfo.compress_type == ZIP_DEFLATED:
990 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
991 zlib.DEFLATED, -15)
992 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000993 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 else:
995 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000996 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000998 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000999 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001001 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001002 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001003 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 self.filelist.append(zinfo)
1005 self.NameToInfo[zinfo.filename] = zinfo
1006
1007 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001008 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001009 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010
1011 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001012 """Close the file, and for mode "w" and "a" write the ending
1013 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001014 if self.fp is None:
1015 return
Tim Petersa608bb22006-06-15 18:06:29 +00001016
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001017 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018 count = 0
1019 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001020 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 count = count + 1
1022 dt = zinfo.date_time
1023 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001024 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001025 extra = []
1026 if zinfo.file_size > ZIP64_LIMIT \
1027 or zinfo.compress_size > ZIP64_LIMIT:
1028 extra.append(zinfo.file_size)
1029 extra.append(zinfo.compress_size)
1030 file_size = 0xffffffff #-1
1031 compress_size = 0xffffffff #-1
1032 else:
1033 file_size = zinfo.file_size
1034 compress_size = zinfo.compress_size
1035
1036 if zinfo.header_offset > ZIP64_LIMIT:
1037 extra.append(zinfo.header_offset)
Tim Petersf79c32d2006-07-31 02:53:03 +00001038 header_offset = -1 # struct "l" format: 32 one bits
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001039 else:
1040 header_offset = zinfo.header_offset
1041
1042 extra_data = zinfo.extra
1043 if extra:
1044 # Append a ZIP64 field to the extra's
1045 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001046 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001047 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001048
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001049 extract_version = max(45, zinfo.extract_version)
1050 create_version = max(45, zinfo.create_version)
1051 else:
1052 extract_version = zinfo.extract_version
1053 create_version = zinfo.create_version
1054
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001055 centdir = struct.pack(structCentralDir,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001056 stringCentralDir, create_version,
1057 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001058 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001059 zinfo.CRC, compress_size, file_size,
1060 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 0, zinfo.internal_attr, zinfo.external_attr,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001062 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 self.fp.write(centdir)
1064 self.fp.write(zinfo.filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001065 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001066 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001067
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001068 pos2 = self.fp.tell()
1069 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001070 if pos1 > ZIP64_LIMIT:
1071 # Need to write the ZIP64 end-of-archive records
1072 zip64endrec = struct.pack(
1073 structEndArchive64, stringEndArchive64,
1074 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1075 self.fp.write(zip64endrec)
1076
1077 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +00001078 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001079 stringEndArchive64Locator, 0, pos2, 1)
1080 self.fp.write(zip64locrec)
1081
Tim Peters352bf0d2006-07-31 02:40:23 +00001082 # XXX Why is `pos3` computed next? It's never referenced.
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001083 pos3 = self.fp.tell()
1084 endrec = struct.pack(structEndArchive, stringEndArchive,
Tim Peters352bf0d2006-07-31 02:40:23 +00001085 0, 0, count, count, pos2 - pos1, -1, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001086 self.fp.write(endrec)
1087
1088 else:
1089 endrec = struct.pack(structEndArchive, stringEndArchive,
1090 0, 0, count, count, pos2 - pos1, pos1, 0)
1091 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001092 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001093 if not self._filePassed:
1094 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 self.fp = None
1096
1097
1098class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001099 """Class to create ZIP archives with Python library files and packages."""
1100
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 def writepy(self, pathname, basename = ""):
1102 """Add all files from "pathname" to the ZIP archive.
1103
Fred Drake484d7352000-10-02 21:14:52 +00001104 If pathname is a package directory, search the directory and
1105 all package subdirectories recursively for all *.py and enter
1106 the modules into the archive. If pathname is a plain
1107 directory, listdir *.py and enter all modules. Else, pathname
1108 must be a Python *.py file and the module will be put into the
1109 archive. Added modules are always module.pyo or module.pyc.
1110 This method will compile the module.py into module.pyc if
1111 necessary.
1112 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001113 dir, name = os.path.split(pathname)
1114 if os.path.isdir(pathname):
1115 initname = os.path.join(pathname, "__init__.py")
1116 if os.path.isfile(initname):
1117 # This is a package directory, add it
1118 if basename:
1119 basename = "%s/%s" % (basename, name)
1120 else:
1121 basename = name
1122 if self.debug:
1123 print "Adding package in", pathname, "as", basename
1124 fname, arcname = self._get_codename(initname[0:-3], basename)
1125 if self.debug:
1126 print "Adding", arcname
1127 self.write(fname, arcname)
1128 dirlist = os.listdir(pathname)
1129 dirlist.remove("__init__.py")
1130 # Add all *.py files and package subdirectories
1131 for filename in dirlist:
1132 path = os.path.join(pathname, filename)
1133 root, ext = os.path.splitext(filename)
1134 if os.path.isdir(path):
1135 if os.path.isfile(os.path.join(path, "__init__.py")):
1136 # This is a package directory, add it
1137 self.writepy(path, basename) # Recursive call
1138 elif ext == ".py":
1139 fname, arcname = self._get_codename(path[0:-3],
1140 basename)
1141 if self.debug:
1142 print "Adding", arcname
1143 self.write(fname, arcname)
1144 else:
1145 # This is NOT a package directory, add its files at top level
1146 if self.debug:
1147 print "Adding files from directory", pathname
1148 for filename in os.listdir(pathname):
1149 path = os.path.join(pathname, filename)
1150 root, ext = os.path.splitext(filename)
1151 if ext == ".py":
1152 fname, arcname = self._get_codename(path[0:-3],
1153 basename)
1154 if self.debug:
1155 print "Adding", arcname
1156 self.write(fname, arcname)
1157 else:
1158 if pathname[-3:] != ".py":
1159 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001160 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161 fname, arcname = self._get_codename(pathname[0:-3], basename)
1162 if self.debug:
1163 print "Adding file", arcname
1164 self.write(fname, arcname)
1165
1166 def _get_codename(self, pathname, basename):
1167 """Return (filename, archivename) for the path.
1168
Fred Drake484d7352000-10-02 21:14:52 +00001169 Given a module name path, return the correct file path and
1170 archive name, compiling if necessary. For example, given
1171 /python/lib/string, return (/python/lib/string.pyc, string).
1172 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001173 file_py = pathname + ".py"
1174 file_pyc = pathname + ".pyc"
1175 file_pyo = pathname + ".pyo"
1176 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001177 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001178 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001180 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001181 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001182 if self.debug:
1183 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001184 try:
1185 py_compile.compile(file_py, file_pyc, None, True)
1186 except py_compile.PyCompileError,err:
1187 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 fname = file_pyc
1189 else:
1190 fname = file_pyc
1191 archivename = os.path.split(fname)[1]
1192 if basename:
1193 archivename = "%s/%s" % (basename, archivename)
1194 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001195
1196
1197def main(args = None):
1198 import textwrap
1199 USAGE=textwrap.dedent("""\
1200 Usage:
1201 zipfile.py -l zipfile.zip # Show listing of a zipfile
1202 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1203 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1204 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1205 """)
1206 if args is None:
1207 args = sys.argv[1:]
1208
1209 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1210 print USAGE
1211 sys.exit(1)
1212
1213 if args[0] == '-l':
1214 if len(args) != 2:
1215 print USAGE
1216 sys.exit(1)
1217 zf = ZipFile(args[1], 'r')
1218 zf.printdir()
1219 zf.close()
1220
1221 elif args[0] == '-t':
1222 if len(args) != 2:
1223 print USAGE
1224 sys.exit(1)
1225 zf = ZipFile(args[1], 'r')
1226 zf.testzip()
1227 print "Done testing"
1228
1229 elif args[0] == '-e':
1230 if len(args) != 3:
1231 print USAGE
1232 sys.exit(1)
1233
1234 zf = ZipFile(args[1], 'r')
1235 out = args[2]
1236 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001237 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001238 tgt = os.path.join(out, path[2:])
1239 else:
1240 tgt = os.path.join(out, path)
1241
1242 tgtdir = os.path.dirname(tgt)
1243 if not os.path.exists(tgtdir):
1244 os.makedirs(tgtdir)
1245 fp = open(tgt, 'wb')
1246 fp.write(zf.read(path))
1247 fp.close()
1248 zf.close()
1249
1250 elif args[0] == '-c':
1251 if len(args) < 3:
1252 print USAGE
1253 sys.exit(1)
1254
1255 def addToZip(zf, path, zippath):
1256 if os.path.isfile(path):
1257 zf.write(path, zippath, ZIP_DEFLATED)
1258 elif os.path.isdir(path):
1259 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001260 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001261 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001262 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001263
1264 zf = ZipFile(args[1], 'w', allowZip64=True)
1265 for src in args[2:]:
1266 addToZip(zf, src, os.path.basename(src))
1267
1268 zf.close()
1269
1270if __name__ == "__main__":
1271 main()