blob: cb967cd94bf70cc4453b35a2bf38be7911b9097e [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00002
3'''Mimification and unmimification of mail messages.
4
5decode quoted-printable parts of a mail message or encode using
6quoted-printable.
7
8Usage:
9 mimify(input, output)
Guido van Rossum74d25e71997-07-30 22:02:28 +000010 unmimify(input, output, decode_base64 = 0)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000011to encode and decode respectively. Input and output may be the name
12of a file or an open file object. Only a readline() method is used
13on the input file, only a write() method is used on the output file.
14When using file names, the input and output file names may be the
15same.
16
17Interactive usage:
18 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
20to encode and decode respectively. Infile defaults to standard
21input and outfile to standard output.
22'''
23
24# Configure
25MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27QUOTE = '> ' # string replies are quoted with
28# End configure
29
Guido van Rossum31626bc1997-10-24 14:46:16 +000030import re, string
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000031
Guido van Rossum31626bc1997-10-24 14:46:16 +000032qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
33base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
34mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
35chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
36he = re.compile('^-*\n')
37mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
38mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
39repl = re.compile('^subject:\\s+re: ', re.I)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000040
41class File:
42 '''A simple fake file object that knows about limited
43 read-ahead and boundaries.
44 The only supported method is readline().'''
45
46 def __init__(self, file, boundary):
47 self.file = file
48 self.boundary = boundary
49 self.peek = None
50
51 def readline(self):
52 if self.peek is not None:
53 return ''
54 line = self.file.readline()
55 if not line:
56 return line
57 if self.boundary:
58 if line == self.boundary + '\n':
59 self.peek = line
60 return ''
61 if line == self.boundary + '--\n':
62 self.peek = line
63 return ''
64 return line
65
66class HeaderFile:
67 def __init__(self, file):
68 self.file = file
69 self.peek = None
70
71 def readline(self):
72 if self.peek is not None:
73 line = self.peek
74 self.peek = None
75 else:
76 line = self.file.readline()
77 if not line:
78 return line
Guido van Rossum31626bc1997-10-24 14:46:16 +000079 if he.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000080 return line
81 while 1:
82 self.peek = self.file.readline()
83 if len(self.peek) == 0 or \
84 (self.peek[0] != ' ' and self.peek[0] != '\t'):
85 return line
86 line = line + self.peek
87 self.peek = None
88
89def mime_decode(line):
90 '''Decode a single line of quoted-printable text to 8bit.'''
91 newline = ''
Guido van Rossum613418a1997-10-30 15:27:37 +000092 pos = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000093 while 1:
Guido van Rossum613418a1997-10-30 15:27:37 +000094 res = mime_code.search(line, pos)
Guido van Rossum31626bc1997-10-24 14:46:16 +000095 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000096 break
Guido van Rossum613418a1997-10-30 15:27:37 +000097 newline = newline + line[pos:res.start(0)] + \
Guido van Rossum31626bc1997-10-24 14:46:16 +000098 chr(string.atoi(res.group(1), 16))
Guido van Rossum613418a1997-10-30 15:27:37 +000099 pos = res.end(0)
100 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000101
102def mime_decode_header(line):
103 '''Decode a header line to 8bit.'''
104 newline = ''
Guido van Rossum613418a1997-10-30 15:27:37 +0000105 pos = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000106 while 1:
Guido van Rossum613418a1997-10-30 15:27:37 +0000107 res = mime_head.search(line, pos)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000108 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000109 break
Guido van Rossum31626bc1997-10-24 14:46:16 +0000110 match = res.group(1)
Guido van Rossum88bb8081997-08-14 14:10:37 +0000111 # convert underscores to spaces (before =XX conversion!)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000112 match = string.join(string.split(match, '_'), ' ')
Guido van Rossum613418a1997-10-30 15:27:37 +0000113 newline = newline + line[pos:res.start(0)] + mime_decode(match)
114 pos = res.end(0)
115 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000116
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000117def unmimify_part(ifile, ofile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000118 '''Convert a quoted-printable part of a MIME mail message to 8bit.'''
119 multipart = None
120 quoted_printable = 0
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000121 is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000122 is_repl = 0
123 if ifile.boundary and ifile.boundary[:2] == QUOTE:
124 prefix = QUOTE
125 else:
126 prefix = ''
127
128 # read header
129 hfile = HeaderFile(ifile)
130 while 1:
131 line = hfile.readline()
132 if not line:
133 return
134 if prefix and line[:len(prefix)] == prefix:
135 line = line[len(prefix):]
136 pref = prefix
137 else:
138 pref = ''
139 line = mime_decode_header(line)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000140 if qp.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000141 quoted_printable = 1
142 continue # skip this header
Guido van Rossum31626bc1997-10-24 14:46:16 +0000143 if decode_base64 and base64_re.match(line):
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000144 is_base64 = 1
145 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000146 ofile.write(pref + line)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000147 if not prefix and repl.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000148 # we're dealing with a reply message
149 is_repl = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000150 mp_res = mp.match(line)
151 if mp_res:
152 multipart = '--' + mp_res.group(1)
153 if he.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000154 break
155 if is_repl and (quoted_printable or multipart):
156 is_repl = 0
157
158 # read body
159 while 1:
160 line = ifile.readline()
161 if not line:
162 return
Guido van Rossum31626bc1997-10-24 14:46:16 +0000163 line = re.sub(mime_head, '\\1', line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000164 if prefix and line[:len(prefix)] == prefix:
165 line = line[len(prefix):]
166 pref = prefix
167 else:
168 pref = ''
169## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
170## multipart = line[:-1]
171 while multipart:
172 if line == multipart + '--\n':
173 ofile.write(pref + line)
174 multipart = None
175 line = None
176 break
177 if line == multipart + '\n':
178 ofile.write(pref + line)
179 nifile = File(ifile, multipart)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000180 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000181 line = nifile.peek
Guido van Rossum13452641998-02-27 14:40:38 +0000182 if not line:
183 # premature end of file
184 break
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000185 continue
186 # not a boundary between parts
187 break
188 if line and quoted_printable:
189 while line[-2:] == '=\n':
190 line = line[:-2]
191 newline = ifile.readline()
192 if newline[:len(QUOTE)] == QUOTE:
193 newline = newline[len(QUOTE):]
194 line = line + newline
195 line = mime_decode(line)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000196 if line and is_base64 and not pref:
197 import base64
198 line = base64.decodestring(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000199 if line:
200 ofile.write(pref + line)
201
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000202def unmimify(infile, outfile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000203 '''Convert quoted-printable parts of a MIME mail message to 8bit.'''
204 if type(infile) == type(''):
205 ifile = open(infile)
206 if type(outfile) == type('') and infile == outfile:
207 import os
208 d, f = os.path.split(infile)
209 os.rename(infile, os.path.join(d, ',' + f))
210 else:
211 ifile = infile
212 if type(outfile) == type(''):
213 ofile = open(outfile, 'w')
214 else:
215 ofile = outfile
216 nifile = File(ifile, None)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000217 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000218 ofile.flush()
219
Guido van Rossum11fbef51997-12-02 17:45:39 +0000220mime_char = re.compile('[=\177-\377]') # quote these chars in body
221mime_header_char = re.compile('[=?\177-\377]') # quote these in header
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000222
223def mime_encode(line, header):
224 '''Code a single line as quoted-printable.
225 If header is set, quote some extra characters.'''
226 if header:
227 reg = mime_header_char
228 else:
229 reg = mime_char
230 newline = ''
Guido van Rossum613418a1997-10-30 15:27:37 +0000231 pos = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000232 if len(line) >= 5 and line[:5] == 'From ':
233 # quote 'From ' at the start of a line for stupid mailers
234 newline = string.upper('=%02x' % ord('F'))
Guido van Rossum613418a1997-10-30 15:27:37 +0000235 pos = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000236 while 1:
Guido van Rossum613418a1997-10-30 15:27:37 +0000237 res = reg.search(line, pos)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000238 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000239 break
Guido van Rossum613418a1997-10-30 15:27:37 +0000240 newline = newline + line[pos:res.start(0)] + \
241 string.upper('=%02x' % ord(res.group(0)))
242 pos = res.end(0)
243 line = newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000244
245 newline = ''
246 while len(line) >= 75:
247 i = 73
248 while line[i] == '=' or line[i-1] == '=':
249 i = i - 1
250 i = i + 1
251 newline = newline + line[:i] + '=\n'
252 line = line[i:]
253 return newline + line
254
Guido van Rossum11fbef51997-12-02 17:45:39 +0000255mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)([ \t)]|\n)')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000256
257def mime_encode_header(line):
258 '''Code a single header line as quoted-printable.'''
259 newline = ''
Guido van Rossum613418a1997-10-30 15:27:37 +0000260 pos = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000261 while 1:
Guido van Rossum613418a1997-10-30 15:27:37 +0000262 res = mime_header.search(line, pos)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000263 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000264 break
Guido van Rossum613418a1997-10-30 15:27:37 +0000265 newline = '%s%s%s=?%s?Q?%s?=%s' % \
266 (newline, line[pos:res.start(0)], res.group(1),
267 CHARSET, mime_encode(res.group(2), 1), res.group(3))
268 pos = res.end(0)
269 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000270
Guido van Rossum31626bc1997-10-24 14:46:16 +0000271mv = re.compile('^mime-version:', re.I)
272cte = re.compile('^content-transfer-encoding:', re.I)
Guido van Rossum11fbef51997-12-02 17:45:39 +0000273iso_char = re.compile('[\177-\377]')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000274
275def mimify_part(ifile, ofile, is_mime):
276 '''Convert an 8bit part of a MIME mail message to quoted-printable.'''
Guido van Rossum69155681996-06-10 19:04:02 +0000277 has_cte = is_qp = is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000278 multipart = None
279 must_quote_body = must_quote_header = has_iso_chars = 0
280
281 header = []
282 header_end = ''
283 message = []
284 message_end = ''
285 # read header
286 hfile = HeaderFile(ifile)
287 while 1:
288 line = hfile.readline()
289 if not line:
290 break
Guido van Rossum31626bc1997-10-24 14:46:16 +0000291 if not must_quote_header and iso_char.search(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000292 must_quote_header = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000293 if mv.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000294 is_mime = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000295 if cte.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000296 has_cte = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000297 if qp.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000298 is_qp = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000299 elif base64_re.match(line):
Guido van Rossum69155681996-06-10 19:04:02 +0000300 is_base64 = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000301 mp_res = mp.match(line)
302 if mp_res:
303 multipart = '--' + mp_res.group(1)
304 if he.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000305 header_end = line
306 break
307 header.append(line)
308
309 # read body
310 while 1:
311 line = ifile.readline()
312 if not line:
313 break
314 if multipart:
315 if line == multipart + '--\n':
316 message_end = line
317 break
318 if line == multipart + '\n':
319 message_end = line
320 break
Guido van Rossum69155681996-06-10 19:04:02 +0000321 if is_base64:
322 message.append(line)
323 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000324 if is_qp:
325 while line[-2:] == '=\n':
326 line = line[:-2]
327 newline = ifile.readline()
328 if newline[:len(QUOTE)] == QUOTE:
329 newline = newline[len(QUOTE):]
330 line = line + newline
331 line = mime_decode(line)
332 message.append(line)
333 if not has_iso_chars:
Guido van Rossum31626bc1997-10-24 14:46:16 +0000334 if iso_char.search(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000335 has_iso_chars = must_quote_body = 1
336 if not must_quote_body:
337 if len(line) > MAXLEN:
338 must_quote_body = 1
339
340 # convert and output header and body
341 for line in header:
342 if must_quote_header:
343 line = mime_encode_header(line)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000344 chrset_res = chrset.match(line)
345 if chrset_res:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000346 if has_iso_chars:
347 # change us-ascii into iso-8859-1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000348 if string.lower(chrset_res.group(2)) == 'us-ascii':
Guido van Rossum613418a1997-10-30 15:27:37 +0000349 line = '%s%s%s' % (chrset_res.group(1),
350 CHARSET,
351 chrset_res.group(3))
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000352 else:
353 # change iso-8859-* into us-ascii
Guido van Rossum613418a1997-10-30 15:27:37 +0000354 line = '%sus-ascii%s' % chrset_res.group(1, 3)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000355 if has_cte and cte.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000356 line = 'Content-Transfer-Encoding: '
Guido van Rossum69155681996-06-10 19:04:02 +0000357 if is_base64:
358 line = line + 'base64\n'
359 elif must_quote_body:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000360 line = line + 'quoted-printable\n'
361 else:
362 line = line + '7bit\n'
363 ofile.write(line)
364 if (must_quote_header or must_quote_body) and not is_mime:
365 ofile.write('Mime-Version: 1.0\n')
366 ofile.write('Content-Type: text/plain; ')
367 if has_iso_chars:
368 ofile.write('charset="%s"\n' % CHARSET)
369 else:
370 ofile.write('charset="us-ascii"\n')
371 if must_quote_body and not has_cte:
372 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
373 ofile.write(header_end)
374
375 for line in message:
376 if must_quote_body:
377 line = mime_encode(line, 0)
378 ofile.write(line)
379 ofile.write(message_end)
380
381 line = message_end
382 while multipart:
383 if line == multipart + '--\n':
Guido van Rossumf789ee41997-03-20 14:42:17 +0000384 # read bit after the end of the last part
385 while 1:
386 line = ifile.readline()
387 if not line:
388 return
389 if must_quote_body:
390 line = mime_encode(line, 0)
391 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000392 if line == multipart + '\n':
393 nifile = File(ifile, multipart)
394 mimify_part(nifile, ofile, 1)
395 line = nifile.peek
Guido van Rossum13452641998-02-27 14:40:38 +0000396 if not line:
397 # premature end of file
398 break
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000399 ofile.write(line)
400 continue
Guido van Rossum1015be31998-08-07 15:26:56 +0000401 # unexpectedly no multipart separator--copy rest of file
402 while 1:
403 line = ifile.readline()
404 if not line:
405 return
406 if must_quote_body:
407 line = mime_encode(line, 0)
408 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000409
410def mimify(infile, outfile):
411 '''Convert 8bit parts of a MIME mail message to quoted-printable.'''
412 if type(infile) == type(''):
413 ifile = open(infile)
414 if type(outfile) == type('') and infile == outfile:
415 import os
416 d, f = os.path.split(infile)
417 os.rename(infile, os.path.join(d, ',' + f))
418 else:
419 ifile = infile
420 if type(outfile) == type(''):
421 ofile = open(outfile, 'w')
422 else:
423 ofile = outfile
424 nifile = File(ifile, None)
425 mimify_part(nifile, ofile, 0)
426 ofile.flush()
427
428import sys
429if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
430 import getopt
431 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
432
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000433 decode_base64 = 0
434 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000435 if len(args) not in (0, 1, 2):
436 print usage
437 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000438 if (('-e', '') in opts) == (('-d', '') in opts) or \
439 ((('-b', '') in opts) and (('-d', '') not in opts)):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000440 print usage
441 sys.exit(1)
442 for o, a in opts:
443 if o == '-e':
444 encode = mimify
445 elif o == '-d':
446 encode = unmimify
447 elif o == '-l':
448 try:
449 MAXLEN = string.atoi(a)
450 except:
451 print usage
452 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000453 elif o == '-b':
454 decode_base64 = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000455 if len(args) == 0:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000456 encode_args = (sys.stdin, sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000457 elif len(args) == 1:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000458 encode_args = (args[0], sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000459 else:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000460 encode_args = (args[0], args[1])
461 if decode_base64:
462 encode_args = encode_args + (decode_base64,)
463 apply(encode, encode_args)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000464