blob: 9735a6a59f2fe3ffc72b418cbc2accbc9d40ba32 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
Gregory P. Smith350d03b2008-01-19 23:10:52 +000037structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringEndArchive = "PK\005\006" # magic number for end of archive record
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000039structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringCentralDir = "PK\001\002" # magic number for central directory
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000041structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042stringFileHeader = "PK\003\004" # magic number for file header
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000043structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +000044stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +000045structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +000046stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
47
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
87 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Ronald Oussoren143cefb2006-06-15 08:14:18 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000104 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
Tim Petersa608bb22006-06-15 18:06:29 +0000110 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000117 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000143 if endrec[-4] == 0xffffffff:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000163 if endrec[-4] == 0xffffffff:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000191 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000192 )
193
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000195 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000196
197 # Terminate the file name at the first null byte. Null bytes in file
198 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000199 null_byte = filename.find(chr(0))
200 if null_byte >= 0:
201 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000202 # This is used to ensure paths in generated ZIP files always use
203 # forward slashes as the directory separator, as required by the
204 # ZIP format specification.
205 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000206 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000207
Greg Ward8e36d282003-06-18 00:53:06 +0000208 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000209 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000210 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000211 self.compress_type = ZIP_STORED # Type of compression for the file
212 self.comment = "" # Comment for each file
213 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000214 if sys.platform == 'win32':
215 self.create_system = 0 # System which created ZIP archive
216 else:
217 # Assume everything else is unix-y
218 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000219 self.create_version = 20 # Version which created ZIP archive
220 self.extract_version = 20 # Version needed to extract archive
221 self.reserved = 0 # Must be zero
222 self.flag_bits = 0 # ZIP flag bits
223 self.volume = 0 # Volume number of file header
224 self.internal_attr = 0 # Internal attributes
225 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000227 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000228 # CRC CRC-32 of the uncompressed file
229 # compress_size Size of the compressed file
230 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231
232 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000233 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 dt = self.date_time
235 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000236 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000238 # Set these to zero because we write them after the file data
239 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000240 else:
Tim Peterse1190062001-01-15 03:34:38 +0000241 CRC = self.CRC
242 compress_size = self.compress_size
243 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000244
245 extra = self.extra
246
247 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
248 # File is larger than what fits into a 4 byte integer,
249 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000250 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 extra = extra + struct.pack(fmt,
252 1, struct.calcsize(fmt)-4, file_size, compress_size)
253 file_size = 0xffffffff # -1
254 compress_size = 0xffffffff # -1
255 self.extract_version = max(45, self.extract_version)
256 self.create_version = max(45, self.extract_version)
257
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000258 header = struct.pack(structFileHeader, stringFileHeader,
259 self.extract_version, self.reserved, self.flag_bits,
260 self.compress_type, dostime, dosdate, CRC,
261 compress_size, file_size,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262 len(self.filename), len(extra))
263 return header + self.filename + extra
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000264
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000265 def _decodeExtra(self):
266 # Try to decode the extra field.
267 extra = self.extra
268 unpack = struct.unpack
269 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000270 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000271 if tp == 1:
272 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000273 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000274 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000275 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000276 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000277 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000278 elif ln == 0:
279 counts = ()
280 else:
281 raise RuntimeError, "Corrupt extra field %s"%(ln,)
282
283 idx = 0
284
285 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000286 # XXX Is this correct? won't this exclude 2**32-1 byte files?
287 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000288 self.file_size = counts[idx]
289 idx += 1
290
291 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
292 self.compress_size = counts[idx]
293 idx += 1
294
295 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
296 old = self.header_offset
297 self.header_offset = counts[idx]
298 idx+=1
299
300 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000301
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000303class _ZipDecrypter:
304 """Class to handle decryption of files stored within a ZIP archive.
305
306 ZIP supports a password-based form of encryption. Even though known
307 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000308 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000309
310 Usage:
311 zd = _ZipDecrypter(mypwd)
312 plain_char = zd(cypher_char)
313 plain_text = map(zd, cypher_text)
314 """
315
316 def _GenerateCRCTable():
317 """Generate a CRC-32 table.
318
319 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
320 internal keys. We noticed that a direct implementation is faster than
321 relying on binascii.crc32().
322 """
323 poly = 0xedb88320
324 table = [0] * 256
325 for i in range(256):
326 crc = i
327 for j in range(8):
328 if crc & 1:
329 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
330 else:
331 crc = ((crc >> 1) & 0x7FFFFFFF)
332 table[i] = crc
333 return table
334 crctable = _GenerateCRCTable()
335
336 def _crc32(self, ch, crc):
337 """Compute the CRC32 primitive on one byte."""
338 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
339
340 def __init__(self, pwd):
341 self.key0 = 305419896
342 self.key1 = 591751049
343 self.key2 = 878082192
344 for p in pwd:
345 self._UpdateKeys(p)
346
347 def _UpdateKeys(self, c):
348 self.key0 = self._crc32(c, self.key0)
349 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
350 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
351 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
352
353 def __call__(self, c):
354 """Decrypt a single character."""
355 c = ord(c)
356 k = self.key2 | 2
357 c = c ^ (((k * (k^1)) >> 8) & 255)
358 c = chr(c)
359 self._UpdateKeys(c)
360 return c
361
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000362class ZipExtFile:
363 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000364 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000365 """
Tim Petersea5962f2007-03-12 18:07:52 +0000366
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000367 def __init__(self, fileobj, zipinfo, decrypt=None):
368 self.fileobj = fileobj
369 self.decrypter = decrypt
370 self.bytes_read = 0L
371 self.rawbuffer = ''
372 self.readbuffer = ''
373 self.linebuffer = ''
374 self.eof = False
375 self.univ_newlines = False
376 self.nlSeps = ("\n", )
377 self.lastdiscard = ''
378
379 self.compress_type = zipinfo.compress_type
380 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000381
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000382 self.closed = False
383 self.mode = "r"
384 self.name = zipinfo.filename
385
386 # read from compressed files in 64k blocks
387 self.compreadsize = 64*1024
388 if self.compress_type == ZIP_DEFLATED:
389 self.dc = zlib.decompressobj(-15)
390
391 def set_univ_newlines(self, univ_newlines):
392 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000393
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000394 # pick line separator char(s) based on universal newlines flag
395 self.nlSeps = ("\n", )
396 if self.univ_newlines:
397 self.nlSeps = ("\r\n", "\r", "\n")
398
399 def __iter__(self):
400 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000401
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000402 def next(self):
403 nextline = self.readline()
404 if not nextline:
405 raise StopIteration()
406
407 return nextline
408
409 def close(self):
410 self.closed = True
411
412 def _checkfornewline(self):
413 nl, nllen = -1, -1
414 if self.linebuffer:
415 # ugly check for cases where half of an \r\n pair was
416 # read on the last pass, and the \r was discarded. In this
417 # case we just throw away the \n at the start of the buffer.
418 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
419 self.linebuffer = self.linebuffer[1:]
420
Tim Petersea5962f2007-03-12 18:07:52 +0000421 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000422 nl = self.linebuffer.find(sep)
423 if nl >= 0:
424 nllen = len(sep)
425 return nl, nllen
426
427 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000428
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000429 def readline(self, size = -1):
430 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000431 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000432 """
433 if size < 0:
434 size = sys.maxint
435 elif size == 0:
436 return ''
437
438 # check for a newline already in buffer
439 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000440
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000441 if nl >= 0:
442 # the next line was already in the buffer
443 nl = min(nl, size)
444 else:
445 # no line break in buffer - try to read more
446 size -= len(self.linebuffer)
447 while nl < 0 and size > 0:
448 buf = self.read(min(size, 100))
449 if not buf:
450 break
451 self.linebuffer += buf
452 size -= len(buf)
453
454 # check for a newline in buffer
455 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000456
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 # we either ran out of bytes in the file, or
458 # met the specified size limit without finding a newline,
459 # so return current buffer
460 if nl < 0:
461 s = self.linebuffer
462 self.linebuffer = ''
463 return s
464
465 buf = self.linebuffer[:nl]
466 self.lastdiscard = self.linebuffer[nl:nl + nllen]
467 self.linebuffer = self.linebuffer[nl + nllen:]
468
469 # line is always returned with \n as newline char (except possibly
470 # for a final incomplete line in the file, which is handled above).
471 return buf + "\n"
472
473 def readlines(self, sizehint = -1):
474 """Return a list with all (following) lines. The sizehint parameter
475 is ignored in this implementation.
476 """
477 result = []
478 while True:
479 line = self.readline()
480 if not line: break
481 result.append(line)
482 return result
483
484 def read(self, size = None):
485 # act like file() obj and return empty string if size is 0
486 if size == 0:
487 return ''
488
489 # determine read size
490 bytesToRead = self.compress_size - self.bytes_read
491
492 # adjust read size for encrypted files since the first 12 bytes
493 # are for the encryption/password information
494 if self.decrypter is not None:
495 bytesToRead -= 12
496
497 if size is not None and size >= 0:
498 if self.compress_type == ZIP_STORED:
499 lr = len(self.readbuffer)
500 bytesToRead = min(bytesToRead, size - lr)
501 elif self.compress_type == ZIP_DEFLATED:
502 if len(self.readbuffer) > size:
503 # the user has requested fewer bytes than we've already
504 # pulled through the decompressor; don't read any more
505 bytesToRead = 0
506 else:
507 # user will use up the buffer, so read some more
508 lr = len(self.rawbuffer)
509 bytesToRead = min(bytesToRead, self.compreadsize - lr)
510
511 # avoid reading past end of file contents
512 if bytesToRead + self.bytes_read > self.compress_size:
513 bytesToRead = self.compress_size - self.bytes_read
514
515 # try to read from file (if necessary)
516 if bytesToRead > 0:
517 bytes = self.fileobj.read(bytesToRead)
518 self.bytes_read += len(bytes)
519 self.rawbuffer += bytes
520
521 # handle contents of raw buffer
522 if self.rawbuffer:
523 newdata = self.rawbuffer
524 self.rawbuffer = ''
525
526 # decrypt new data if we were given an object to handle that
527 if newdata and self.decrypter is not None:
528 newdata = ''.join(map(self.decrypter, newdata))
529
530 # decompress newly read data if necessary
531 if newdata and self.compress_type == ZIP_DEFLATED:
532 newdata = self.dc.decompress(newdata)
533 self.rawbuffer = self.dc.unconsumed_tail
534 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000535 # we're out of raw bytes (both from the file and
536 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000537 # decompressor is done
538 newdata += self.dc.flush()
539 # prevent decompressor from being used again
540 self.dc = None
541
542 self.readbuffer += newdata
543
544
545 # return what the user asked for
546 if size is None or len(self.readbuffer) <= size:
547 bytes = self.readbuffer
548 self.readbuffer = ''
549 else:
550 bytes = self.readbuffer[:size]
551 self.readbuffer = self.readbuffer[size:]
552
553 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000554
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000555
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000556class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000557 """ Class with methods to open, read, write, close, list zip files.
558
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000559 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000560
Fred Drake3d9091e2001-03-26 15:49:24 +0000561 file: Either the path to the file, or a file-like object.
562 If it is a path, the file will be opened and closed by ZipFile.
563 mode: The mode can be either read "r", write "w" or append "a".
564 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000565 allowZip64: if True ZipFile will create files with ZIP64 extensions when
566 needed, otherwise it will raise an exception when this would
567 be necessary.
568
Fred Drake3d9091e2001-03-26 15:49:24 +0000569 """
Fred Drake484d7352000-10-02 21:14:52 +0000570
Fred Drake90eac282001-02-28 05:29:34 +0000571 fp = None # Set here since __del__ checks it
572
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000573 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000574 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000575 if mode not in ("r", "w", "a"):
576 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
577
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000578 if compression == ZIP_STORED:
579 pass
580 elif compression == ZIP_DEFLATED:
581 if not zlib:
582 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000583 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000584 else:
585 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000586
587 self._allowZip64 = allowZip64
588 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000589 self.debug = 0 # Level of printing: 0 through 3
590 self.NameToInfo = {} # Find file info given name
591 self.filelist = [] # List of ZipInfo instances for archive
592 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000593 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000594 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000595
Fred Drake3d9091e2001-03-26 15:49:24 +0000596 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000597 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000598 self._filePassed = 0
599 self.filename = file
600 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000601 try:
602 self.fp = open(file, modeDict[mode])
603 except IOError:
604 if mode == 'a':
605 mode = key = 'w'
606 self.fp = open(file, modeDict[mode])
607 else:
608 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000609 else:
610 self._filePassed = 1
611 self.fp = file
612 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000613
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000614 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000615 self._GetContents()
616 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000617 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000618 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000619 try: # See if file is a zip file
620 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000622 self.fp.seek(self.start_dir, 0)
623 except BadZipfile: # file is not a zip file, just append
624 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000625 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000626 if not self._filePassed:
627 self.fp.close()
628 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000629 raise RuntimeError, 'Mode must be "r", "w" or "a"'
630
631 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000632 """Read the directory, making sure we close the file if the format
633 is bad."""
634 try:
635 self._RealGetContents()
636 except BadZipfile:
637 if not self._filePassed:
638 self.fp.close()
639 self.fp = None
640 raise
641
642 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000643 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000644 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000645 endrec = _EndRecData(fp)
646 if not endrec:
647 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000648 if self.debug > 1:
649 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000650 size_cd = endrec[5] # bytes in central directory
651 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000652 self.comment = endrec[8] # archive comment
653 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000654 if endrec[9] > ZIP64_LIMIT:
655 x = endrec[9] - size_cd - 56 - 20
656 else:
657 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000658 # "concat" is zero, unless zip was concatenated to another file
659 concat = x - offset_cd
660 if self.debug > 2:
661 print "given, inferred, offset", offset_cd, x, concat
662 # self.start_dir: Position of start of central directory
663 self.start_dir = offset_cd + concat
664 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000665 data = fp.read(size_cd)
666 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000667 total = 0
668 while total < size_cd:
669 centdir = fp.read(46)
670 total = total + 46
671 if centdir[0:4] != stringCentralDir:
672 raise BadZipfile, "Bad magic number for central directory"
673 centdir = struct.unpack(structCentralDir, centdir)
674 if self.debug > 2:
675 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000676 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000677 # Create ZipInfo instance to store file information
678 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000679 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
680 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
681 total = (total + centdir[_CD_FILENAME_LENGTH]
682 + centdir[_CD_EXTRA_FIELD_LENGTH]
683 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000684 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000685 (x.create_version, x.create_system, x.extract_version, x.reserved,
686 x.flag_bits, x.compress_type, t, d,
687 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
688 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
689 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000690 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000691 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000692 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000693
694 x._decodeExtra()
695 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 self.filelist.append(x)
697 self.NameToInfo[x.filename] = x
698 if self.debug > 2:
699 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000700
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701
702 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000703 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000704 l = []
705 for data in self.filelist:
706 l.append(data.filename)
707 return l
708
709 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000710 """Return a list of class ZipInfo instances for files in the
711 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 return self.filelist
713
714 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000715 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
717 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000718 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
720
721 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000722 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 for zinfo in self.filelist:
724 try:
Tim Peterse1190062001-01-15 03:34:38 +0000725 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000726 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 return zinfo.filename
728
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000729
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000731 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000732 info = self.NameToInfo.get(name)
733 if info is None:
734 raise KeyError(
735 'There is no item named %r in the archive' % name)
736
737 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000739 def setpassword(self, pwd):
740 """Set default password for encrypted files."""
741 self.pwd = pwd
742
743 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000744 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000745 return self.open(name, "r", pwd).read()
746
747 def open(self, name, mode="r", pwd=None):
748 """Return file-like object for 'name'."""
749 if mode not in ("r", "U", "rU"):
750 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 if not self.fp:
752 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000753 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000754
Tim Petersea5962f2007-03-12 18:07:52 +0000755 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000756 # given a file object in the constructor
757 if self._filePassed:
758 zef_file = self.fp
759 else:
760 zef_file = open(self.filename, 'rb')
761
762 # Get info object for name
763 zinfo = self.getinfo(name)
764
765 filepos = zef_file.tell()
766
767 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000768
769 # Skip the file header:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000770 fheader = zef_file.read(30)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000771 if fheader[0:4] != stringFileHeader:
772 raise BadZipfile, "Bad magic number for file header"
773
774 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000775 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000776 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000777 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000778
779 if fname != zinfo.orig_filename:
780 raise BadZipfile, \
781 'File name in directory "%s" and header "%s" differ.' % (
782 zinfo.orig_filename, fname)
783
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000784 # check for encrypted flag & handle password
785 is_encrypted = zinfo.flag_bits & 0x1
786 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000787 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000788 if not pwd:
789 pwd = self.pwd
790 if not pwd:
791 raise RuntimeError, "File %s is encrypted, " \
792 "password required for extraction" % name
793
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000794 zd = _ZipDecrypter(pwd)
795 # The first 12 bytes in the cypher stream is an encryption header
796 # used to strengthen the algorithm. The first 11 bytes are
797 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000798 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000799 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000800 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000801 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000802 if zinfo.flag_bits & 0x8:
803 # compare against the file type from extended local headers
804 check_byte = (zinfo._raw_time >> 8) & 0xff
805 else:
806 # compare against the CRC otherwise
807 check_byte = (zinfo.CRC >> 24) & 0xff
808 if ord(h[11]) != check_byte:
809 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000810
811 # build and return a ZipExtFile
812 if zd is None:
813 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000815 zef = ZipExtFile(zef_file, zinfo, zd)
816
817 # set universal newlines on ZipExtFile if necessary
818 if "U" in mode:
819 zef.set_univ_newlines(True)
820 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821
Georg Brandl62416bc2008-01-07 18:47:44 +0000822 def extract(self, member, path=None, pwd=None):
823 """Extract a member from the archive to the current working directory,
824 using its full name. Its file information is extracted as accurately
825 as possible. `member' may be a filename or a ZipInfo object. You can
826 specify a different directory using `path'.
827 """
828 if not isinstance(member, ZipInfo):
829 member = self.getinfo(member)
830
831 if path is None:
832 path = os.getcwd()
833
834 return self._extract_member(member, path, pwd)
835
836 def extractall(self, path=None, members=None, pwd=None):
837 """Extract all members from the archive to the current working
838 directory. `path' specifies a different directory to extract to.
839 `members' is optional and must be a subset of the list returned
840 by namelist().
841 """
842 if members is None:
843 members = self.namelist()
844
845 for zipinfo in members:
846 self.extract(zipinfo, path, pwd)
847
848 def _extract_member(self, member, targetpath, pwd):
849 """Extract the ZipInfo object 'member' to a physical
850 file on the path targetpath.
851 """
852 # build the destination pathname, replacing
853 # forward slashes to platform specific separators.
854 if targetpath[-1:] == "/":
855 targetpath = targetpath[:-1]
856
857 # don't include leading "/" from file name if present
858 if os.path.isabs(member.filename):
859 targetpath = os.path.join(targetpath, member.filename[1:])
860 else:
861 targetpath = os.path.join(targetpath, member.filename)
862
863 targetpath = os.path.normpath(targetpath)
864
865 # Create all upper directories if necessary.
866 upperdirs = os.path.dirname(targetpath)
867 if upperdirs and not os.path.exists(upperdirs):
868 os.makedirs(upperdirs)
869
870 source = self.open(member.filename, pwd=pwd)
871 target = file(targetpath, "wb")
872 shutil.copyfileobj(source, target)
873 source.close()
874 target.close()
875
876 return targetpath
877
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000878 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000879 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000880 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000881 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000882 print "Duplicate name:", zinfo.filename
883 if self.mode not in ("w", "a"):
884 raise RuntimeError, 'write() requires mode "w" or "a"'
885 if not self.fp:
886 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000887 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000888 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
889 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000890 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000891 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
892 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000893 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000894 if zinfo.file_size > ZIP64_LIMIT:
895 if not self._allowZip64:
896 raise LargeZipFile("Filesize would require ZIP64 extensions")
897 if zinfo.header_offset > ZIP64_LIMIT:
898 if not self._allowZip64:
899 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000900
901 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000902 """Put the bytes from filename into the archive under the name
903 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000904 if not self.fp:
905 raise RuntimeError(
906 "Attempt to write to ZIP archive that was already closed")
907
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000908 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000909 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910 date_time = mtime[0:6]
911 # Create ZipInfo instance to store file information
912 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000913 arcname = filename
914 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
915 while arcname[0] in (os.sep, os.altsep):
916 arcname = arcname[1:]
917 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000918 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000919 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000920 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000921 else:
Tim Peterse1190062001-01-15 03:34:38 +0000922 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000923
924 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000925 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000926 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000927
928 self._writecheck(zinfo)
929 self._didModify = True
930 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000931 # Must overwrite CRC and sizes with correct data later
932 zinfo.CRC = CRC = 0
933 zinfo.compress_size = compress_size = 0
934 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000935 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936 if zinfo.compress_type == ZIP_DEFLATED:
937 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
938 zlib.DEFLATED, -15)
939 else:
940 cmpr = None
941 while 1:
942 buf = fp.read(1024 * 8)
943 if not buf:
944 break
945 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000946 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000947 if cmpr:
948 buf = cmpr.compress(buf)
949 compress_size = compress_size + len(buf)
950 self.fp.write(buf)
951 fp.close()
952 if cmpr:
953 buf = cmpr.flush()
954 compress_size = compress_size + len(buf)
955 self.fp.write(buf)
956 zinfo.compress_size = compress_size
957 else:
958 zinfo.compress_size = file_size
959 zinfo.CRC = CRC
960 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000961 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000962 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000963 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000964 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000966 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000967 self.filelist.append(zinfo)
968 self.NameToInfo[zinfo.filename] = zinfo
969
Just van Rossumb083cb32002-12-12 12:23:32 +0000970 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000971 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000972 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
973 the name of the file in the archive."""
974 if not isinstance(zinfo_or_arcname, ZipInfo):
975 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000976 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +0000977 zinfo.compress_type = self.compression
978 else:
979 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000980
981 if not self.fp:
982 raise RuntimeError(
983 "Attempt to write to ZIP archive that was already closed")
984
Tim Peterse1190062001-01-15 03:34:38 +0000985 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000986 zinfo.header_offset = self.fp.tell() # Start of header bytes
987 self._writecheck(zinfo)
988 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000989 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 if zinfo.compress_type == ZIP_DEFLATED:
991 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
992 zlib.DEFLATED, -15)
993 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000994 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 else:
996 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000997 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000998 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001000 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001002 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001003 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001004 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 self.filelist.append(zinfo)
1006 self.NameToInfo[zinfo.filename] = zinfo
1007
1008 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001009 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001010 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001011
1012 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001013 """Close the file, and for mode "w" and "a" write the ending
1014 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001015 if self.fp is None:
1016 return
Tim Petersa608bb22006-06-15 18:06:29 +00001017
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001018 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019 count = 0
1020 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001021 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001022 count = count + 1
1023 dt = zinfo.date_time
1024 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001025 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001026 extra = []
1027 if zinfo.file_size > ZIP64_LIMIT \
1028 or zinfo.compress_size > ZIP64_LIMIT:
1029 extra.append(zinfo.file_size)
1030 extra.append(zinfo.compress_size)
1031 file_size = 0xffffffff #-1
1032 compress_size = 0xffffffff #-1
1033 else:
1034 file_size = zinfo.file_size
1035 compress_size = zinfo.compress_size
1036
1037 if zinfo.header_offset > ZIP64_LIMIT:
1038 extra.append(zinfo.header_offset)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001039 header_offset = 0xffffffffL # -1 32 bit
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001040 else:
1041 header_offset = zinfo.header_offset
1042
1043 extra_data = zinfo.extra
1044 if extra:
1045 # Append a ZIP64 field to the extra's
1046 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001047 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001048 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001049
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001050 extract_version = max(45, zinfo.extract_version)
1051 create_version = max(45, zinfo.create_version)
1052 else:
1053 extract_version = zinfo.extract_version
1054 create_version = zinfo.create_version
1055
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001056 try:
1057 centdir = struct.pack(structCentralDir,
1058 stringCentralDir, create_version,
1059 zinfo.create_system, extract_version, zinfo.reserved,
1060 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1061 zinfo.CRC, compress_size, file_size,
1062 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1063 0, zinfo.internal_attr, zinfo.external_attr,
1064 header_offset)
1065 except DeprecationWarning:
1066 print >>sys.stderr, (structCentralDir,
1067 stringCentralDir, create_version,
1068 zinfo.create_system, extract_version, zinfo.reserved,
1069 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1070 zinfo.CRC, compress_size, file_size,
1071 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1072 0, zinfo.internal_attr, zinfo.external_attr,
1073 header_offset)
1074 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001075 self.fp.write(centdir)
1076 self.fp.write(zinfo.filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001077 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001078 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001079
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001080 pos2 = self.fp.tell()
1081 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001082 if pos1 > ZIP64_LIMIT:
1083 # Need to write the ZIP64 end-of-archive records
1084 zip64endrec = struct.pack(
1085 structEndArchive64, stringEndArchive64,
1086 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1087 self.fp.write(zip64endrec)
1088
1089 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +00001090 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001091 stringEndArchive64Locator, 0, pos2, 1)
1092 self.fp.write(zip64locrec)
1093
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001094 endrec = struct.pack(structEndArchive, stringEndArchive,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001095 0, 0, count, count, pos2 - pos1, 0xffffffffL, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001096 self.fp.write(endrec)
1097
1098 else:
1099 endrec = struct.pack(structEndArchive, stringEndArchive,
1100 0, 0, count, count, pos2 - pos1, pos1, 0)
1101 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001102 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001103 if not self._filePassed:
1104 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 self.fp = None
1106
1107
1108class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001109 """Class to create ZIP archives with Python library files and packages."""
1110
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 def writepy(self, pathname, basename = ""):
1112 """Add all files from "pathname" to the ZIP archive.
1113
Fred Drake484d7352000-10-02 21:14:52 +00001114 If pathname is a package directory, search the directory and
1115 all package subdirectories recursively for all *.py and enter
1116 the modules into the archive. If pathname is a plain
1117 directory, listdir *.py and enter all modules. Else, pathname
1118 must be a Python *.py file and the module will be put into the
1119 archive. Added modules are always module.pyo or module.pyc.
1120 This method will compile the module.py into module.pyc if
1121 necessary.
1122 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 dir, name = os.path.split(pathname)
1124 if os.path.isdir(pathname):
1125 initname = os.path.join(pathname, "__init__.py")
1126 if os.path.isfile(initname):
1127 # This is a package directory, add it
1128 if basename:
1129 basename = "%s/%s" % (basename, name)
1130 else:
1131 basename = name
1132 if self.debug:
1133 print "Adding package in", pathname, "as", basename
1134 fname, arcname = self._get_codename(initname[0:-3], basename)
1135 if self.debug:
1136 print "Adding", arcname
1137 self.write(fname, arcname)
1138 dirlist = os.listdir(pathname)
1139 dirlist.remove("__init__.py")
1140 # Add all *.py files and package subdirectories
1141 for filename in dirlist:
1142 path = os.path.join(pathname, filename)
1143 root, ext = os.path.splitext(filename)
1144 if os.path.isdir(path):
1145 if os.path.isfile(os.path.join(path, "__init__.py")):
1146 # This is a package directory, add it
1147 self.writepy(path, basename) # Recursive call
1148 elif ext == ".py":
1149 fname, arcname = self._get_codename(path[0:-3],
1150 basename)
1151 if self.debug:
1152 print "Adding", arcname
1153 self.write(fname, arcname)
1154 else:
1155 # This is NOT a package directory, add its files at top level
1156 if self.debug:
1157 print "Adding files from directory", pathname
1158 for filename in os.listdir(pathname):
1159 path = os.path.join(pathname, filename)
1160 root, ext = os.path.splitext(filename)
1161 if ext == ".py":
1162 fname, arcname = self._get_codename(path[0:-3],
1163 basename)
1164 if self.debug:
1165 print "Adding", arcname
1166 self.write(fname, arcname)
1167 else:
1168 if pathname[-3:] != ".py":
1169 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001170 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 fname, arcname = self._get_codename(pathname[0:-3], basename)
1172 if self.debug:
1173 print "Adding file", arcname
1174 self.write(fname, arcname)
1175
1176 def _get_codename(self, pathname, basename):
1177 """Return (filename, archivename) for the path.
1178
Fred Drake484d7352000-10-02 21:14:52 +00001179 Given a module name path, return the correct file path and
1180 archive name, compiling if necessary. For example, given
1181 /python/lib/string, return (/python/lib/string.pyc, string).
1182 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 file_py = pathname + ".py"
1184 file_pyc = pathname + ".pyc"
1185 file_pyo = pathname + ".pyo"
1186 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001187 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001188 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001190 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001191 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 if self.debug:
1193 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001194 try:
1195 py_compile.compile(file_py, file_pyc, None, True)
1196 except py_compile.PyCompileError,err:
1197 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 fname = file_pyc
1199 else:
1200 fname = file_pyc
1201 archivename = os.path.split(fname)[1]
1202 if basename:
1203 archivename = "%s/%s" % (basename, archivename)
1204 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001205
1206
1207def main(args = None):
1208 import textwrap
1209 USAGE=textwrap.dedent("""\
1210 Usage:
1211 zipfile.py -l zipfile.zip # Show listing of a zipfile
1212 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1213 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1214 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1215 """)
1216 if args is None:
1217 args = sys.argv[1:]
1218
1219 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1220 print USAGE
1221 sys.exit(1)
1222
1223 if args[0] == '-l':
1224 if len(args) != 2:
1225 print USAGE
1226 sys.exit(1)
1227 zf = ZipFile(args[1], 'r')
1228 zf.printdir()
1229 zf.close()
1230
1231 elif args[0] == '-t':
1232 if len(args) != 2:
1233 print USAGE
1234 sys.exit(1)
1235 zf = ZipFile(args[1], 'r')
1236 zf.testzip()
1237 print "Done testing"
1238
1239 elif args[0] == '-e':
1240 if len(args) != 3:
1241 print USAGE
1242 sys.exit(1)
1243
1244 zf = ZipFile(args[1], 'r')
1245 out = args[2]
1246 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001247 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001248 tgt = os.path.join(out, path[2:])
1249 else:
1250 tgt = os.path.join(out, path)
1251
1252 tgtdir = os.path.dirname(tgt)
1253 if not os.path.exists(tgtdir):
1254 os.makedirs(tgtdir)
1255 fp = open(tgt, 'wb')
1256 fp.write(zf.read(path))
1257 fp.close()
1258 zf.close()
1259
1260 elif args[0] == '-c':
1261 if len(args) < 3:
1262 print USAGE
1263 sys.exit(1)
1264
1265 def addToZip(zf, path, zippath):
1266 if os.path.isfile(path):
1267 zf.write(path, zippath, ZIP_DEFLATED)
1268 elif os.path.isdir(path):
1269 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001270 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001271 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001272 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001273
1274 zf = ZipFile(args[1], 'w', allowZip64=True)
1275 for src in args[2:]:
1276 addToZip(zf, src, os.path.basename(src))
1277
1278 zf.close()
1279
1280if __name__ == "__main__":
1281 main()