blob: b274682e7042d2b32ada927aba83ba207de6ca77 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Martin v. Löwis00756902006-02-05 17:09:41 +00006import struct, os, time, sys
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000011except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000012 zlib = None
13
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019
20
21class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Thomas Wouters0e3f5912006-08-11 14:57:12 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
37structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000038stringEndArchive = b"PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000039structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringCentralDir = b"PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000041structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringFileHeader = b"PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000087 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Thomas Wouters0e3f5912006-08-11 14:57:12 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
Jeremy Hylton9ff05b22007-08-29 19:09:54 +0000138 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 )
192
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000194 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000195
196 # Terminate the file name at the first null byte. Null bytes in file
197 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000198 null_byte = filename.find(chr(0))
199 if null_byte >= 0:
200 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 # This is used to ensure paths in generated ZIP files always use
202 # forward slashes as the directory separator, as required by the
203 # ZIP format specification.
204 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000205 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
Greg Ward8e36d282003-06-18 00:53:06 +0000207 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000209 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000210 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000211 self.comment = b"" # Comment for each file
212 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000213 if sys.platform == 'win32':
214 self.create_system = 0 # System which created ZIP archive
215 else:
216 # Assume everything else is unix-y
217 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000218 self.create_version = 20 # Version which created ZIP archive
219 self.extract_version = 20 # Version needed to extract archive
220 self.reserved = 0 # Must be zero
221 self.flag_bits = 0 # ZIP flag bits
222 self.volume = 0 # Volume number of file header
223 self.internal_attr = 0 # Internal attributes
224 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000226 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000227 # CRC CRC-32 of the uncompressed file
228 # compress_size Size of the compressed file
229 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000230
231 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233 dt = self.date_time
234 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000235 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000237 # Set these to zero because we write them after the file data
238 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 else:
Tim Peterse1190062001-01-15 03:34:38 +0000240 CRC = self.CRC
241 compress_size = self.compress_size
242 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243
244 extra = self.extra
245
246 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
247 # File is larger than what fits into a 4 byte integer,
248 # fall back to the ZIP64 extension
249 fmt = '<hhqq'
250 extra = extra + struct.pack(fmt,
251 1, struct.calcsize(fmt)-4, file_size, compress_size)
252 file_size = 0xffffffff # -1
253 compress_size = 0xffffffff # -1
254 self.extract_version = max(45, self.extract_version)
255 self.create_version = max(45, self.extract_version)
256
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000257 header = struct.pack(structFileHeader, stringFileHeader,
258 self.extract_version, self.reserved, self.flag_bits,
259 self.compress_type, dostime, dosdate, CRC,
260 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 len(self.filename), len(extra))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000262 return header + self.filename.encode("utf-8") + extra
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000263
264 def _decodeExtra(self):
265 # Try to decode the extra field.
266 extra = self.extra
267 unpack = struct.unpack
268 while extra:
269 tp, ln = unpack('<hh', extra[:4])
270 if tp == 1:
271 if ln >= 24:
272 counts = unpack('<qqq', extra[4:28])
273 elif ln == 16:
274 counts = unpack('<qq', extra[4:20])
275 elif ln == 8:
276 counts = unpack('<q', extra[4:12])
277 elif ln == 0:
278 counts = ()
279 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000280 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000281
282 idx = 0
283
284 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000285 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286 self.file_size = counts[idx]
287 idx += 1
288
Guido van Rossume2a383d2007-01-15 16:59:06 +0000289 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 self.compress_size = counts[idx]
291 idx += 1
292
Guido van Rossume2a383d2007-01-15 16:59:06 +0000293 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 old = self.header_offset
295 self.header_offset = counts[idx]
296 idx+=1
297
298 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
300
Thomas Wouterscf297e42007-02-23 15:07:44 +0000301class _ZipDecrypter:
302 """Class to handle decryption of files stored within a ZIP archive.
303
304 ZIP supports a password-based form of encryption. Even though known
305 plaintext attacks have been found against it, it is still useful
306 for low-level securicy.
307
308 Usage:
309 zd = _ZipDecrypter(mypwd)
310 plain_char = zd(cypher_char)
311 plain_text = map(zd, cypher_text)
312 """
313
314 def _GenerateCRCTable():
315 """Generate a CRC-32 table.
316
317 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
318 internal keys. We noticed that a direct implementation is faster than
319 relying on binascii.crc32().
320 """
321 poly = 0xedb88320
322 table = [0] * 256
323 for i in range(256):
324 crc = i
325 for j in range(8):
326 if crc & 1:
327 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
328 else:
329 crc = ((crc >> 1) & 0x7FFFFFFF)
330 table[i] = crc
331 return table
332 crctable = _GenerateCRCTable()
333
334 def _crc32(self, ch, crc):
335 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000336 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000337
338 def __init__(self, pwd):
339 self.key0 = 305419896
340 self.key1 = 591751049
341 self.key2 = 878082192
342 for p in pwd:
343 self._UpdateKeys(p)
344
345 def _UpdateKeys(self, c):
346 self.key0 = self._crc32(c, self.key0)
347 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
348 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000349 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000350
351 def __call__(self, c):
352 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000353 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000354 k = self.key2 | 2
355 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000356 self._UpdateKeys(c)
357 return c
358
Guido van Rossumd8faa362007-04-27 19:54:29 +0000359class ZipExtFile:
360 """File-like object for reading an archive member.
361 Is returned by ZipFile.open().
362 """
363
364 def __init__(self, fileobj, zipinfo, decrypt=None):
365 self.fileobj = fileobj
366 self.decrypter = decrypt
367 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000368 self.rawbuffer = b''
369 self.readbuffer = b''
370 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000371 self.eof = False
372 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000373 self.nlSeps = (b"\n", )
374 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375
376 self.compress_type = zipinfo.compress_type
377 self.compress_size = zipinfo.compress_size
378
379 self.closed = False
380 self.mode = "r"
381 self.name = zipinfo.filename
382
383 # read from compressed files in 64k blocks
384 self.compreadsize = 64*1024
385 if self.compress_type == ZIP_DEFLATED:
386 self.dc = zlib.decompressobj(-15)
387
388 def set_univ_newlines(self, univ_newlines):
389 self.univ_newlines = univ_newlines
390
391 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000392 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000394 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395
396 def __iter__(self):
397 return self
398
399 def __next__(self):
400 nextline = self.readline()
401 if not nextline:
402 raise StopIteration()
403
404 return nextline
405
406 def close(self):
407 self.closed = True
408
409 def _checkfornewline(self):
410 nl, nllen = -1, -1
411 if self.linebuffer:
412 # ugly check for cases where half of an \r\n pair was
413 # read on the last pass, and the \r was discarded. In this
414 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000415 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 self.linebuffer = self.linebuffer[1:]
417
418 for sep in self.nlSeps:
419 nl = self.linebuffer.find(sep)
420 if nl >= 0:
421 nllen = len(sep)
422 return nl, nllen
423
424 return nl, nllen
425
426 def readline(self, size = -1):
427 """Read a line with approx. size. If size is negative,
428 read a whole line.
429 """
430 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000431 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000432 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000433 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000434
435 # check for a newline already in buffer
436 nl, nllen = self._checkfornewline()
437
438 if nl >= 0:
439 # the next line was already in the buffer
440 nl = min(nl, size)
441 else:
442 # no line break in buffer - try to read more
443 size -= len(self.linebuffer)
444 while nl < 0 and size > 0:
445 buf = self.read(min(size, 100))
446 if not buf:
447 break
448 self.linebuffer += buf
449 size -= len(buf)
450
451 # check for a newline in buffer
452 nl, nllen = self._checkfornewline()
453
454 # we either ran out of bytes in the file, or
455 # met the specified size limit without finding a newline,
456 # so return current buffer
457 if nl < 0:
458 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 return s
461
462 buf = self.linebuffer[:nl]
463 self.lastdiscard = self.linebuffer[nl:nl + nllen]
464 self.linebuffer = self.linebuffer[nl + nllen:]
465
466 # line is always returned with \n as newline char (except possibly
467 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
470 def readlines(self, sizehint = -1):
471 """Return a list with all (following) lines. The sizehint parameter
472 is ignored in this implementation.
473 """
474 result = []
475 while True:
476 line = self.readline()
477 if not line: break
478 result.append(line)
479 return result
480
481 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000482 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000483 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000484 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000485
486 # determine read size
487 bytesToRead = self.compress_size - self.bytes_read
488
489 # adjust read size for encrypted files since the first 12 bytes
490 # are for the encryption/password information
491 if self.decrypter is not None:
492 bytesToRead -= 12
493
494 if size is not None and size >= 0:
495 if self.compress_type == ZIP_STORED:
496 lr = len(self.readbuffer)
497 bytesToRead = min(bytesToRead, size - lr)
498 elif self.compress_type == ZIP_DEFLATED:
499 if len(self.readbuffer) > size:
500 # the user has requested fewer bytes than we've already
501 # pulled through the decompressor; don't read any more
502 bytesToRead = 0
503 else:
504 # user will use up the buffer, so read some more
505 lr = len(self.rawbuffer)
506 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507
508 # avoid reading past end of file contents
509 if bytesToRead + self.bytes_read > self.compress_size:
510 bytesToRead = self.compress_size - self.bytes_read
511
512 # try to read from file (if necessary)
513 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000514 data = self.fileobj.read(bytesToRead)
515 self.bytes_read += len(data)
516 try:
517 self.rawbuffer += data
518 except:
519 print(repr(self.fileobj), repr(self.rawbuffer),
520 repr(data))
521 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
523 # handle contents of raw buffer
524 if self.rawbuffer:
525 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000526 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
528 # decrypt new data if we were given an object to handle that
529 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000530 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
532 # decompress newly read data if necessary
533 if newdata and self.compress_type == ZIP_DEFLATED:
534 newdata = self.dc.decompress(newdata)
535 self.rawbuffer = self.dc.unconsumed_tail
536 if self.eof and len(self.rawbuffer) == 0:
537 # we're out of raw bytes (both from the file and
538 # the local buffer); flush just to make sure the
539 # decompressor is done
540 newdata += self.dc.flush()
541 # prevent decompressor from being used again
542 self.dc = None
543
544 self.readbuffer += newdata
545
546
547 # return what the user asked for
548 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000549 data = self.readbuffer
550 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 self.readbuffer = self.readbuffer[size:]
554
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000559 """ Class with methods to open, read, write, close, list zip files.
560
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000561 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000562
Fred Drake3d9091e2001-03-26 15:49:24 +0000563 file: Either the path to the file, or a file-like object.
564 If it is a path, the file will be opened and closed by ZipFile.
565 mode: The mode can be either read "r", write "w" or append "a".
566 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000567 allowZip64: if True ZipFile will create files with ZIP64 extensions when
568 needed, otherwise it will raise an exception when this would
569 be necessary.
570
Fred Drake3d9091e2001-03-26 15:49:24 +0000571 """
Fred Drake484d7352000-10-02 21:14:52 +0000572
Fred Drake90eac282001-02-28 05:29:34 +0000573 fp = None # Set here since __del__ checks it
574
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000575 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000576 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000577 if mode not in ("r", "w", "a"):
578 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
579
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000580 if compression == ZIP_STORED:
581 pass
582 elif compression == ZIP_DEFLATED:
583 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000584 raise RuntimeError(
585 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000586 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000587 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000588
589 self._allowZip64 = allowZip64
590 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000591 self.debug = 0 # Level of printing: 0 through 3
592 self.NameToInfo = {} # Find file info given name
593 self.filelist = [] # List of ZipInfo instances for archive
594 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000595 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000596 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000597
Fred Drake3d9091e2001-03-26 15:49:24 +0000598 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000599 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000600 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000601 self._filePassed = 0
602 self.filename = file
603 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000604 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000605 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000606 except IOError:
607 if mode == 'a':
608 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000609 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000610 else:
611 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000612 else:
613 self._filePassed = 1
614 self.fp = file
615 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000616
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000618 self._GetContents()
619 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000620 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000622 try: # See if file is a zip file
623 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000625 self.fp.seek(self.start_dir, 0)
626 except BadZipfile: # file is not a zip file, just append
627 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000629 if not self._filePassed:
630 self.fp.close()
631 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000632 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000633
634 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000635 """Read the directory, making sure we close the file if the format
636 is bad."""
637 try:
638 self._RealGetContents()
639 except BadZipfile:
640 if not self._filePassed:
641 self.fp.close()
642 self.fp = None
643 raise
644
645 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000646 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000648 endrec = _EndRecData(fp)
649 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000650 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000651 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000652 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000653 size_cd = endrec[5] # bytes in central directory
654 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000655 self.comment = endrec[8] # archive comment
656 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000657 if endrec[9] > ZIP64_LIMIT:
658 x = endrec[9] - size_cd - 56 - 20
659 else:
660 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 # "concat" is zero, unless zip was concatenated to another file
662 concat = x - offset_cd
663 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000664 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 # self.start_dir: Position of start of central directory
666 self.start_dir = offset_cd + concat
667 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000669 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000670 total = 0
671 while total < size_cd:
672 centdir = fp.read(46)
673 total = total + 46
674 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000675 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 centdir = struct.unpack(structCentralDir, centdir)
677 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000678 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000679 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 # Create ZipInfo instance to store file information
Guido van Rossum98297ee2007-11-06 21:34:58 +0000681 x = ZipInfo(filename.decode("utf-8"))
Fred Drake3e038e52001-02-28 17:56:26 +0000682 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
683 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
684 total = (total + centdir[_CD_FILENAME_LENGTH]
685 + centdir[_CD_EXTRA_FIELD_LENGTH]
686 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000687 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000688 (x.create_version, x.create_system, x.extract_version, x.reserved,
689 x.flag_bits, x.compress_type, t, d,
690 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
691 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
692 # Convert date/time code to (year, month, day, hour, min, sec)
693 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000694 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000695
696 x._decodeExtra()
697 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 self.filelist.append(x)
699 self.NameToInfo[x.filename] = x
700 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000701 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000702
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703
704 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000705 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 l = []
707 for data in self.filelist:
708 l.append(data.filename)
709 return l
710
711 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000712 """Return a list of class ZipInfo instances for files in the
713 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 return self.filelist
715
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000716 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000717 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000718 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
719 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 for zinfo in self.filelist:
721 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000722 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
723 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724
725 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000726 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 for zinfo in self.filelist:
728 try:
Tim Peterse1190062001-01-15 03:34:38 +0000729 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000730 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 return zinfo.filename
732
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000733
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000735 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000736 info = self.NameToInfo.get(name)
737 if info is None:
738 raise KeyError(
739 'There is no item named %r in the archive' % name)
740
741 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742
Thomas Wouterscf297e42007-02-23 15:07:44 +0000743 def setpassword(self, pwd):
744 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000745 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000746 self.pwd = pwd
747
748 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000749 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000750 return self.open(name, "r", pwd).read()
751
752 def open(self, name, mode="r", pwd=None):
753 """Return file-like object for 'name'."""
754 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000755 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000757 raise RuntimeError(
758 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000759
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760 # Only open a new file for instances where we were not
761 # given a file object in the constructor
762 if self._filePassed:
763 zef_file = self.fp
764 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000765 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766
767 # Get info object for name
768 zinfo = self.getinfo(name)
769
770 filepos = zef_file.tell()
771
772 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000773
774 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000777 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
779 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000781 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000784 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000785 raise BadZipfile(
786 'File name in directory %r and header %r differ.'
787 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000788
Guido van Rossumd8faa362007-04-27 19:54:29 +0000789 # check for encrypted flag & handle password
790 is_encrypted = zinfo.flag_bits & 0x1
791 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000792 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 if not pwd:
794 pwd = self.pwd
795 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000796 raise RuntimeError("File %s is encrypted, "
797 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798
Thomas Wouterscf297e42007-02-23 15:07:44 +0000799 zd = _ZipDecrypter(pwd)
800 # The first 12 bytes in the cypher stream is an encryption header
801 # used to strengthen the algorithm. The first 11 bytes are
802 # completely random, while the 12th contains the MSB of the CRC,
803 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000805 h = list(map(zd, bytes[0:12]))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000806 if h[11] != ((zinfo.CRC>>24) & 255):
Collin Winterce36ad82007-08-30 01:19:48 +0000807 raise RuntimeError("Bad password for file %s" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808
809 # build and return a ZipExtFile
810 if zd is None:
811 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 zef = ZipExtFile(zef_file, zinfo, zd)
814
815 # set universal newlines on ZipExtFile if necessary
816 if "U" in mode:
817 zef.set_univ_newlines(True)
818 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819
820 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000821 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000822 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000823 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000824 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000826 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000828 raise RuntimeError(
829 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000831 raise RuntimeError(
832 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000834 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000835 if zinfo.file_size > ZIP64_LIMIT:
836 if not self._allowZip64:
837 raise LargeZipFile("Filesize would require ZIP64 extensions")
838 if zinfo.header_offset > ZIP64_LIMIT:
839 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000840 raise LargeZipFile(
841 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842
843 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000844 """Put the bytes from filename into the archive under the name
845 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000846 if not self.fp:
847 raise RuntimeError(
848 "Attempt to write to ZIP archive that was already closed")
849
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000851 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 date_time = mtime[0:6]
853 # Create ZipInfo instance to store file information
854 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000855 arcname = filename
856 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
857 while arcname[0] in (os.sep, os.altsep):
858 arcname = arcname[1:]
859 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000860 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000862 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000863 else:
Tim Peterse1190062001-01-15 03:34:38 +0000864 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000865
866 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000867 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000868 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000869
870 self._writecheck(zinfo)
871 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000872 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000873 # Must overwrite CRC and sizes with correct data later
874 zinfo.CRC = CRC = 0
875 zinfo.compress_size = compress_size = 0
876 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000878 if zinfo.compress_type == ZIP_DEFLATED:
879 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
880 zlib.DEFLATED, -15)
881 else:
882 cmpr = None
883 while 1:
884 buf = fp.read(1024 * 8)
885 if not buf:
886 break
887 file_size = file_size + len(buf)
888 CRC = binascii.crc32(buf, CRC)
889 if cmpr:
890 buf = cmpr.compress(buf)
891 compress_size = compress_size + len(buf)
892 self.fp.write(buf)
893 fp.close()
894 if cmpr:
895 buf = cmpr.flush()
896 compress_size = compress_size + len(buf)
897 self.fp.write(buf)
898 zinfo.compress_size = compress_size
899 else:
900 zinfo.compress_size = file_size
901 zinfo.CRC = CRC
902 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000903 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000904 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000905 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000906 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000908 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 self.filelist.append(zinfo)
910 self.NameToInfo[zinfo.filename] = zinfo
911
Guido van Rossum85825dc2007-08-27 17:03:28 +0000912 def writestr(self, zinfo_or_arcname, data):
913 """Write a file into the archive. The contents is 'data', which
914 may be either a 'str' or a 'bytes' instance; if it is a 'str',
915 it is encoded as UTF-8 first.
916 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +0000917 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +0000918 if isinstance(data, str):
919 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +0000920 if not isinstance(zinfo_or_arcname, ZipInfo):
921 zinfo = ZipInfo(filename=zinfo_or_arcname,
922 date_time=time.localtime(time.time()))
923 zinfo.compress_type = self.compression
924 else:
925 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000926
927 if not self.fp:
928 raise RuntimeError(
929 "Attempt to write to ZIP archive that was already closed")
930
Guido van Rossum85825dc2007-08-27 17:03:28 +0000931 zinfo.file_size = len(data) # Uncompressed size
932 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000933 self._writecheck(zinfo)
934 self._didModify = True
Guido van Rossum85825dc2007-08-27 17:03:28 +0000935 zinfo.CRC = binascii.crc32(data) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936 if zinfo.compress_type == ZIP_DEFLATED:
937 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
938 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +0000939 data = co.compress(data) + co.flush()
940 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000941 else:
942 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +0000943 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000944 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +0000945 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000946 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000947 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000948 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000949 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000950 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000951 self.filelist.append(zinfo)
952 self.NameToInfo[zinfo.filename] = zinfo
953
954 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000955 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000956 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000957
958 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000959 """Close the file, and for mode "w" and "a" write the ending
960 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000961 if self.fp is None:
962 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000963
964 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965 count = 0
966 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000967 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000968 count = count + 1
969 dt = zinfo.date_time
970 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000971 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000972 extra = []
973 if zinfo.file_size > ZIP64_LIMIT \
974 or zinfo.compress_size > ZIP64_LIMIT:
975 extra.append(zinfo.file_size)
976 extra.append(zinfo.compress_size)
977 file_size = 0xffffffff #-1
978 compress_size = 0xffffffff #-1
979 else:
980 file_size = zinfo.file_size
981 compress_size = zinfo.compress_size
982
983 if zinfo.header_offset > ZIP64_LIMIT:
984 extra.append(zinfo.header_offset)
985 header_offset = -1 # struct "l" format: 32 one bits
986 else:
987 header_offset = zinfo.header_offset
988
989 extra_data = zinfo.extra
990 if extra:
991 # Append a ZIP64 field to the extra's
992 extra_data = struct.pack(
993 '<hh' + 'q'*len(extra),
994 1, 8*len(extra), *extra) + extra_data
995
996 extract_version = max(45, zinfo.extract_version)
997 create_version = max(45, zinfo.create_version)
998 else:
999 extract_version = zinfo.extract_version
1000 create_version = zinfo.create_version
1001
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001002 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001003 stringCentralDir, create_version,
1004 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001006 zinfo.CRC, compress_size, file_size,
1007 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001009 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 self.fp.write(centdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001011 self.fp.write(zinfo.filename.encode("utf-8"))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001012 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001014
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 pos2 = self.fp.tell()
1016 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001017 if pos1 > ZIP64_LIMIT:
1018 # Need to write the ZIP64 end-of-archive records
1019 zip64endrec = struct.pack(
1020 structEndArchive64, stringEndArchive64,
1021 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1022 self.fp.write(zip64endrec)
1023
1024 zip64locrec = struct.pack(
1025 structEndArchive64Locator,
1026 stringEndArchive64Locator, 0, pos2, 1)
1027 self.fp.write(zip64locrec)
1028
1029 # XXX Why is `pos3` computed next? It's never referenced.
1030 pos3 = self.fp.tell()
1031 endrec = struct.pack(structEndArchive, stringEndArchive,
1032 0, 0, count, count, pos2 - pos1, -1, 0)
1033 self.fp.write(endrec)
1034
1035 else:
1036 endrec = struct.pack(structEndArchive, stringEndArchive,
1037 0, 0, count, count, pos2 - pos1, pos1, 0)
1038 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001039 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001040 if not self._filePassed:
1041 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042 self.fp = None
1043
1044
1045class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001046 """Class to create ZIP archives with Python library files and packages."""
1047
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 def writepy(self, pathname, basename = ""):
1049 """Add all files from "pathname" to the ZIP archive.
1050
Fred Drake484d7352000-10-02 21:14:52 +00001051 If pathname is a package directory, search the directory and
1052 all package subdirectories recursively for all *.py and enter
1053 the modules into the archive. If pathname is a plain
1054 directory, listdir *.py and enter all modules. Else, pathname
1055 must be a Python *.py file and the module will be put into the
1056 archive. Added modules are always module.pyo or module.pyc.
1057 This method will compile the module.py into module.pyc if
1058 necessary.
1059 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 dir, name = os.path.split(pathname)
1061 if os.path.isdir(pathname):
1062 initname = os.path.join(pathname, "__init__.py")
1063 if os.path.isfile(initname):
1064 # This is a package directory, add it
1065 if basename:
1066 basename = "%s/%s" % (basename, name)
1067 else:
1068 basename = name
1069 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001070 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071 fname, arcname = self._get_codename(initname[0:-3], basename)
1072 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001073 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001074 self.write(fname, arcname)
1075 dirlist = os.listdir(pathname)
1076 dirlist.remove("__init__.py")
1077 # Add all *.py files and package subdirectories
1078 for filename in dirlist:
1079 path = os.path.join(pathname, filename)
1080 root, ext = os.path.splitext(filename)
1081 if os.path.isdir(path):
1082 if os.path.isfile(os.path.join(path, "__init__.py")):
1083 # This is a package directory, add it
1084 self.writepy(path, basename) # Recursive call
1085 elif ext == ".py":
1086 fname, arcname = self._get_codename(path[0:-3],
1087 basename)
1088 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001089 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 self.write(fname, arcname)
1091 else:
1092 # This is NOT a package directory, add its files at top level
1093 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001094 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 for filename in os.listdir(pathname):
1096 path = os.path.join(pathname, filename)
1097 root, ext = os.path.splitext(filename)
1098 if ext == ".py":
1099 fname, arcname = self._get_codename(path[0:-3],
1100 basename)
1101 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001102 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 self.write(fname, arcname)
1104 else:
1105 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001106 raise RuntimeError(
1107 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 fname, arcname = self._get_codename(pathname[0:-3], basename)
1109 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001110 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 self.write(fname, arcname)
1112
1113 def _get_codename(self, pathname, basename):
1114 """Return (filename, archivename) for the path.
1115
Fred Drake484d7352000-10-02 21:14:52 +00001116 Given a module name path, return the correct file path and
1117 archive name, compiling if necessary. For example, given
1118 /python/lib/string, return (/python/lib/string.pyc, string).
1119 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 file_py = pathname + ".py"
1121 file_pyc = pathname + ".pyc"
1122 file_pyo = pathname + ".pyo"
1123 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001124 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001125 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001127 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001128 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001130 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001131 try:
1132 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001133 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001134 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 fname = file_pyc
1136 else:
1137 fname = file_pyc
1138 archivename = os.path.split(fname)[1]
1139 if basename:
1140 archivename = "%s/%s" % (basename, archivename)
1141 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001142
1143
1144def main(args = None):
1145 import textwrap
1146 USAGE=textwrap.dedent("""\
1147 Usage:
1148 zipfile.py -l zipfile.zip # Show listing of a zipfile
1149 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1150 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1151 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1152 """)
1153 if args is None:
1154 args = sys.argv[1:]
1155
1156 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001157 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158 sys.exit(1)
1159
1160 if args[0] == '-l':
1161 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001162 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001163 sys.exit(1)
1164 zf = ZipFile(args[1], 'r')
1165 zf.printdir()
1166 zf.close()
1167
1168 elif args[0] == '-t':
1169 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001170 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001171 sys.exit(1)
1172 zf = ZipFile(args[1], 'r')
1173 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001174 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001175
1176 elif args[0] == '-e':
1177 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001178 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001179 sys.exit(1)
1180
1181 zf = ZipFile(args[1], 'r')
1182 out = args[2]
1183 for path in zf.namelist():
1184 if path.startswith('./'):
1185 tgt = os.path.join(out, path[2:])
1186 else:
1187 tgt = os.path.join(out, path)
1188
1189 tgtdir = os.path.dirname(tgt)
1190 if not os.path.exists(tgtdir):
1191 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001192 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193 fp.write(zf.read(path))
1194 fp.close()
1195 zf.close()
1196
1197 elif args[0] == '-c':
1198 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001199 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001200 sys.exit(1)
1201
1202 def addToZip(zf, path, zippath):
1203 if os.path.isfile(path):
1204 zf.write(path, zippath, ZIP_DEFLATED)
1205 elif os.path.isdir(path):
1206 for nm in os.listdir(path):
1207 addToZip(zf,
1208 os.path.join(path, nm), os.path.join(zippath, nm))
1209 # else: ignore
1210
1211 zf = ZipFile(args[1], 'w', allowZip64=True)
1212 for src in args[2:]:
1213 addToZip(zf, src, os.path.basename(src))
1214
1215 zf.close()
1216
1217if __name__ == "__main__":
1218 main()