blob: 1c15983d1e2647c13e04422e29f27bb42db1d647 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00002
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00003"""Mimification and unmimification of mail messages.
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00004
Guido van Rossum54f22ed2000-02-04 15:10:34 +00005Decode quoted-printable parts of a mail message or encode using
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00006quoted-printable.
7
8Usage:
Tim Peters07e99cb2001-01-14 23:47:14 +00009 mimify(input, output)
10 unmimify(input, output, decode_base64 = 0)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000011to encode and decode respectively. Input and output may be the name
12of a file or an open file object. Only a readline() method is used
13on the input file, only a write() method is used on the output file.
14When using file names, the input and output file names may be the
15same.
16
17Interactive usage:
Tim Peters07e99cb2001-01-14 23:47:14 +000018 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000020to encode and decode respectively. Infile defaults to standard
21input and outfile to standard output.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000022"""
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000023
24# Configure
Tim Peters07e99cb2001-01-14 23:47:14 +000025MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27QUOTE = '> ' # string replies are quoted with
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000028# End configure
29
Eric S. Raymond6b8c5282001-02-09 07:10:12 +000030import re
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000031
Brett Cannonc249bda2007-05-30 21:48:58 +000032import warnings
33warnings.warn("the mimify module is deprecated; use the email package instead",
34 DeprecationWarning, 2)
35
Skip Montanaro03d90142001-01-25 15:29:22 +000036__all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"]
37
Guido van Rossum31626bc1997-10-24 14:46:16 +000038qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
39base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
40mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
41chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
42he = re.compile('^-*\n')
43mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
44mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
45repl = re.compile('^subject:\\s+re: ', re.I)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000046
47class File:
Tim Peters07e99cb2001-01-14 23:47:14 +000048 """A simple fake file object that knows about limited read-ahead and
49 boundaries. The only supported method is readline()."""
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000050
Tim Peters07e99cb2001-01-14 23:47:14 +000051 def __init__(self, file, boundary):
52 self.file = file
53 self.boundary = boundary
54 self.peek = None
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000055
Tim Peters07e99cb2001-01-14 23:47:14 +000056 def readline(self):
57 if self.peek is not None:
58 return ''
59 line = self.file.readline()
60 if not line:
61 return line
62 if self.boundary:
63 if line == self.boundary + '\n':
64 self.peek = line
65 return ''
66 if line == self.boundary + '--\n':
67 self.peek = line
68 return ''
69 return line
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000070
71class HeaderFile:
Tim Peters07e99cb2001-01-14 23:47:14 +000072 def __init__(self, file):
73 self.file = file
74 self.peek = None
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000075
Tim Peters07e99cb2001-01-14 23:47:14 +000076 def readline(self):
77 if self.peek is not None:
78 line = self.peek
79 self.peek = None
80 else:
81 line = self.file.readline()
82 if not line:
83 return line
84 if he.match(line):
85 return line
86 while 1:
87 self.peek = self.file.readline()
88 if len(self.peek) == 0 or \
89 (self.peek[0] != ' ' and self.peek[0] != '\t'):
90 return line
91 line = line + self.peek
92 self.peek = None
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000093
94def mime_decode(line):
Tim Peters07e99cb2001-01-14 23:47:14 +000095 """Decode a single line of quoted-printable text to 8bit."""
96 newline = ''
97 pos = 0
98 while 1:
99 res = mime_code.search(line, pos)
100 if res is None:
101 break
102 newline = newline + line[pos:res.start(0)] + \
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000103 chr(int(res.group(1), 16))
Tim Peters07e99cb2001-01-14 23:47:14 +0000104 pos = res.end(0)
105 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000106
107def mime_decode_header(line):
Tim Peters07e99cb2001-01-14 23:47:14 +0000108 """Decode a header line to 8bit."""
109 newline = ''
110 pos = 0
111 while 1:
112 res = mime_head.search(line, pos)
113 if res is None:
114 break
115 match = res.group(1)
116 # convert underscores to spaces (before =XX conversion!)
Eric S. Raymond6e025bc2001-02-10 00:22:33 +0000117 match = ' '.join(match.split('_'))
Tim Peters07e99cb2001-01-14 23:47:14 +0000118 newline = newline + line[pos:res.start(0)] + mime_decode(match)
119 pos = res.end(0)
120 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000121
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000122def unmimify_part(ifile, ofile, decode_base64 = 0):
Tim Peters07e99cb2001-01-14 23:47:14 +0000123 """Convert a quoted-printable part of a MIME mail message to 8bit."""
124 multipart = None
125 quoted_printable = 0
126 is_base64 = 0
127 is_repl = 0
128 if ifile.boundary and ifile.boundary[:2] == QUOTE:
129 prefix = QUOTE
130 else:
131 prefix = ''
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000132
Tim Peters07e99cb2001-01-14 23:47:14 +0000133 # read header
134 hfile = HeaderFile(ifile)
135 while 1:
136 line = hfile.readline()
137 if not line:
138 return
139 if prefix and line[:len(prefix)] == prefix:
140 line = line[len(prefix):]
141 pref = prefix
142 else:
143 pref = ''
144 line = mime_decode_header(line)
145 if qp.match(line):
146 quoted_printable = 1
147 continue # skip this header
148 if decode_base64 and base64_re.match(line):
149 is_base64 = 1
150 continue
151 ofile.write(pref + line)
152 if not prefix and repl.match(line):
153 # we're dealing with a reply message
154 is_repl = 1
155 mp_res = mp.match(line)
156 if mp_res:
157 multipart = '--' + mp_res.group(1)
158 if he.match(line):
159 break
160 if is_repl and (quoted_printable or multipart):
161 is_repl = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000162
Tim Peters07e99cb2001-01-14 23:47:14 +0000163 # read body
164 while 1:
165 line = ifile.readline()
166 if not line:
167 return
168 line = re.sub(mime_head, '\\1', line)
169 if prefix and line[:len(prefix)] == prefix:
170 line = line[len(prefix):]
171 pref = prefix
172 else:
173 pref = ''
174## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
175## multipart = line[:-1]
176 while multipart:
177 if line == multipart + '--\n':
178 ofile.write(pref + line)
179 multipart = None
180 line = None
181 break
182 if line == multipart + '\n':
183 ofile.write(pref + line)
184 nifile = File(ifile, multipart)
185 unmimify_part(nifile, ofile, decode_base64)
186 line = nifile.peek
187 if not line:
188 # premature end of file
189 break
190 continue
191 # not a boundary between parts
192 break
193 if line and quoted_printable:
194 while line[-2:] == '=\n':
195 line = line[:-2]
196 newline = ifile.readline()
197 if newline[:len(QUOTE)] == QUOTE:
198 newline = newline[len(QUOTE):]
199 line = line + newline
200 line = mime_decode(line)
201 if line and is_base64 and not pref:
202 import base64
203 line = base64.decodestring(line)
204 if line:
205 ofile.write(pref + line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000206
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000207def unmimify(infile, outfile, decode_base64 = 0):
Tim Peters07e99cb2001-01-14 23:47:14 +0000208 """Convert quoted-printable parts of a MIME mail message to 8bit."""
209 if type(infile) == type(''):
210 ifile = open(infile)
211 if type(outfile) == type('') and infile == outfile:
212 import os
213 d, f = os.path.split(infile)
214 os.rename(infile, os.path.join(d, ',' + f))
215 else:
216 ifile = infile
217 if type(outfile) == type(''):
218 ofile = open(outfile, 'w')
219 else:
220 ofile = outfile
221 nifile = File(ifile, None)
222 unmimify_part(nifile, ofile, decode_base64)
223 ofile.flush()
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000224
Guido van Rossum11fbef51997-12-02 17:45:39 +0000225mime_char = re.compile('[=\177-\377]') # quote these chars in body
226mime_header_char = re.compile('[=?\177-\377]') # quote these in header
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000227
228def mime_encode(line, header):
Tim Peters07e99cb2001-01-14 23:47:14 +0000229 """Code a single line as quoted-printable.
230 If header is set, quote some extra characters."""
231 if header:
232 reg = mime_header_char
233 else:
234 reg = mime_char
235 newline = ''
236 pos = 0
237 if len(line) >= 5 and line[:5] == 'From ':
238 # quote 'From ' at the start of a line for stupid mailers
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000239 newline = ('=%02x' % ord('F')).upper()
Tim Peters07e99cb2001-01-14 23:47:14 +0000240 pos = 1
241 while 1:
242 res = reg.search(line, pos)
243 if res is None:
244 break
245 newline = newline + line[pos:res.start(0)] + \
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000246 ('=%02x' % ord(res.group(0))).upper()
Tim Peters07e99cb2001-01-14 23:47:14 +0000247 pos = res.end(0)
248 line = newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000249
Tim Peters07e99cb2001-01-14 23:47:14 +0000250 newline = ''
251 while len(line) >= 75:
252 i = 73
253 while line[i] == '=' or line[i-1] == '=':
254 i = i - 1
255 i = i + 1
256 newline = newline + line[:i] + '=\n'
257 line = line[i:]
258 return newline + line
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000259
unknownfee75ac2001-07-04 10:15:58 +0000260mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000261
262def mime_encode_header(line):
Tim Peters07e99cb2001-01-14 23:47:14 +0000263 """Code a single header line as quoted-printable."""
264 newline = ''
265 pos = 0
266 while 1:
267 res = mime_header.search(line, pos)
268 if res is None:
269 break
unknownfee75ac2001-07-04 10:15:58 +0000270 newline = '%s%s%s=?%s?Q?%s?=' % \
Tim Peters07e99cb2001-01-14 23:47:14 +0000271 (newline, line[pos:res.start(0)], res.group(1),
unknownfee75ac2001-07-04 10:15:58 +0000272 CHARSET, mime_encode(res.group(2), 1))
Tim Peters07e99cb2001-01-14 23:47:14 +0000273 pos = res.end(0)
274 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000275
Guido van Rossum31626bc1997-10-24 14:46:16 +0000276mv = re.compile('^mime-version:', re.I)
277cte = re.compile('^content-transfer-encoding:', re.I)
Guido van Rossum11fbef51997-12-02 17:45:39 +0000278iso_char = re.compile('[\177-\377]')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000279
280def mimify_part(ifile, ofile, is_mime):
Tim Peters07e99cb2001-01-14 23:47:14 +0000281 """Convert an 8bit part of a MIME mail message to quoted-printable."""
282 has_cte = is_qp = is_base64 = 0
283 multipart = None
284 must_quote_body = must_quote_header = has_iso_chars = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000285
Tim Peters07e99cb2001-01-14 23:47:14 +0000286 header = []
287 header_end = ''
288 message = []
289 message_end = ''
290 # read header
291 hfile = HeaderFile(ifile)
292 while 1:
293 line = hfile.readline()
294 if not line:
295 break
296 if not must_quote_header and iso_char.search(line):
297 must_quote_header = 1
298 if mv.match(line):
299 is_mime = 1
300 if cte.match(line):
301 has_cte = 1
302 if qp.match(line):
303 is_qp = 1
304 elif base64_re.match(line):
305 is_base64 = 1
306 mp_res = mp.match(line)
307 if mp_res:
308 multipart = '--' + mp_res.group(1)
309 if he.match(line):
310 header_end = line
311 break
312 header.append(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000313
Tim Peters07e99cb2001-01-14 23:47:14 +0000314 # read body
315 while 1:
316 line = ifile.readline()
317 if not line:
318 break
319 if multipart:
320 if line == multipart + '--\n':
321 message_end = line
322 break
323 if line == multipart + '\n':
324 message_end = line
325 break
326 if is_base64:
327 message.append(line)
328 continue
329 if is_qp:
330 while line[-2:] == '=\n':
331 line = line[:-2]
332 newline = ifile.readline()
333 if newline[:len(QUOTE)] == QUOTE:
334 newline = newline[len(QUOTE):]
335 line = line + newline
336 line = mime_decode(line)
337 message.append(line)
338 if not has_iso_chars:
339 if iso_char.search(line):
340 has_iso_chars = must_quote_body = 1
341 if not must_quote_body:
342 if len(line) > MAXLEN:
343 must_quote_body = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000344
Tim Peters07e99cb2001-01-14 23:47:14 +0000345 # convert and output header and body
346 for line in header:
347 if must_quote_header:
348 line = mime_encode_header(line)
349 chrset_res = chrset.match(line)
350 if chrset_res:
351 if has_iso_chars:
352 # change us-ascii into iso-8859-1
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000353 if chrset_res.group(2).lower() == 'us-ascii':
Tim Peters07e99cb2001-01-14 23:47:14 +0000354 line = '%s%s%s' % (chrset_res.group(1),
355 CHARSET,
356 chrset_res.group(3))
357 else:
358 # change iso-8859-* into us-ascii
359 line = '%sus-ascii%s' % chrset_res.group(1, 3)
360 if has_cte and cte.match(line):
361 line = 'Content-Transfer-Encoding: '
362 if is_base64:
363 line = line + 'base64\n'
364 elif must_quote_body:
365 line = line + 'quoted-printable\n'
366 else:
367 line = line + '7bit\n'
368 ofile.write(line)
369 if (must_quote_header or must_quote_body) and not is_mime:
370 ofile.write('Mime-Version: 1.0\n')
371 ofile.write('Content-Type: text/plain; ')
372 if has_iso_chars:
373 ofile.write('charset="%s"\n' % CHARSET)
374 else:
375 ofile.write('charset="us-ascii"\n')
376 if must_quote_body and not has_cte:
377 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
378 ofile.write(header_end)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000379
Tim Peters07e99cb2001-01-14 23:47:14 +0000380 for line in message:
381 if must_quote_body:
382 line = mime_encode(line, 0)
383 ofile.write(line)
384 ofile.write(message_end)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000385
Tim Peters07e99cb2001-01-14 23:47:14 +0000386 line = message_end
387 while multipart:
388 if line == multipart + '--\n':
389 # read bit after the end of the last part
390 while 1:
391 line = ifile.readline()
392 if not line:
393 return
394 if must_quote_body:
395 line = mime_encode(line, 0)
396 ofile.write(line)
397 if line == multipart + '\n':
398 nifile = File(ifile, multipart)
399 mimify_part(nifile, ofile, 1)
400 line = nifile.peek
401 if not line:
402 # premature end of file
403 break
404 ofile.write(line)
405 continue
406 # unexpectedly no multipart separator--copy rest of file
407 while 1:
408 line = ifile.readline()
409 if not line:
410 return
411 if must_quote_body:
412 line = mime_encode(line, 0)
413 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000414
415def mimify(infile, outfile):
Tim Peters07e99cb2001-01-14 23:47:14 +0000416 """Convert 8bit parts of a MIME mail message to quoted-printable."""
417 if type(infile) == type(''):
418 ifile = open(infile)
419 if type(outfile) == type('') and infile == outfile:
420 import os
421 d, f = os.path.split(infile)
422 os.rename(infile, os.path.join(d, ',' + f))
423 else:
424 ifile = infile
425 if type(outfile) == type(''):
426 ofile = open(outfile, 'w')
427 else:
428 ofile = outfile
429 nifile = File(ifile, None)
430 mimify_part(nifile, ofile, 0)
431 ofile.flush()
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000432
433import sys
434if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000435 import getopt
436 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000437
Tim Peters07e99cb2001-01-14 23:47:14 +0000438 decode_base64 = 0
439 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
440 if len(args) not in (0, 1, 2):
441 print usage
442 sys.exit(1)
443 if (('-e', '') in opts) == (('-d', '') in opts) or \
444 ((('-b', '') in opts) and (('-d', '') not in opts)):
445 print usage
446 sys.exit(1)
447 for o, a in opts:
448 if o == '-e':
449 encode = mimify
450 elif o == '-d':
451 encode = unmimify
452 elif o == '-l':
453 try:
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000454 MAXLEN = int(a)
Guido van Rossum6274fff2001-04-10 15:42:02 +0000455 except (ValueError, OverflowError):
Tim Peters07e99cb2001-01-14 23:47:14 +0000456 print usage
457 sys.exit(1)
458 elif o == '-b':
459 decode_base64 = 1
460 if len(args) == 0:
461 encode_args = (sys.stdin, sys.stdout)
462 elif len(args) == 1:
463 encode_args = (args[0], sys.stdout)
464 else:
465 encode_args = (args[0], args[1])
466 if decode_base64:
467 encode_args = encode_args + (decode_base64,)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000468 encode(*encode_args)