blob: 1b4af283ea55458d0b4bd44d39146ec21342be55 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00002
3'''Mimification and unmimification of mail messages.
4
5decode quoted-printable parts of a mail message or encode using
6quoted-printable.
7
8Usage:
9 mimify(input, output)
Guido van Rossum74d25e71997-07-30 22:02:28 +000010 unmimify(input, output, decode_base64 = 0)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000011to encode and decode respectively. Input and output may be the name
12of a file or an open file object. Only a readline() method is used
13on the input file, only a write() method is used on the output file.
14When using file names, the input and output file names may be the
15same.
16
17Interactive usage:
18 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
20to encode and decode respectively. Infile defaults to standard
21input and outfile to standard output.
22'''
23
24# Configure
25MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27QUOTE = '> ' # string replies are quoted with
28# End configure
29
Guido van Rossum31626bc1997-10-24 14:46:16 +000030import re, string
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000031
Guido van Rossum31626bc1997-10-24 14:46:16 +000032qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
33base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
34mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
35chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
36he = re.compile('^-*\n')
37mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
38mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
39repl = re.compile('^subject:\\s+re: ', re.I)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000040
41class File:
42 '''A simple fake file object that knows about limited
43 read-ahead and boundaries.
44 The only supported method is readline().'''
45
46 def __init__(self, file, boundary):
47 self.file = file
48 self.boundary = boundary
49 self.peek = None
50
51 def readline(self):
52 if self.peek is not None:
53 return ''
54 line = self.file.readline()
55 if not line:
56 return line
57 if self.boundary:
58 if line == self.boundary + '\n':
59 self.peek = line
60 return ''
61 if line == self.boundary + '--\n':
62 self.peek = line
63 return ''
64 return line
65
66class HeaderFile:
67 def __init__(self, file):
68 self.file = file
69 self.peek = None
70
71 def readline(self):
72 if self.peek is not None:
73 line = self.peek
74 self.peek = None
75 else:
76 line = self.file.readline()
77 if not line:
78 return line
Guido van Rossum31626bc1997-10-24 14:46:16 +000079 if he.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000080 return line
81 while 1:
82 self.peek = self.file.readline()
83 if len(self.peek) == 0 or \
84 (self.peek[0] != ' ' and self.peek[0] != '\t'):
85 return line
86 line = line + self.peek
87 self.peek = None
88
89def mime_decode(line):
90 '''Decode a single line of quoted-printable text to 8bit.'''
91 newline = ''
92 while 1:
Guido van Rossum31626bc1997-10-24 14:46:16 +000093 res = mime_code.search(line)
94 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000095 break
Guido van Rossum31626bc1997-10-24 14:46:16 +000096 newline = newline + line[:res.start(0)] + \
97 chr(string.atoi(res.group(1), 16))
98 line = line[res.end(0):]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000099 return newline + line
100
101def mime_decode_header(line):
102 '''Decode a header line to 8bit.'''
103 newline = ''
104 while 1:
Guido van Rossum31626bc1997-10-24 14:46:16 +0000105 res = mime_head.search(line)
106 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000107 break
Guido van Rossum31626bc1997-10-24 14:46:16 +0000108 match = res.group(1)
Guido van Rossum88bb8081997-08-14 14:10:37 +0000109 # convert underscores to spaces (before =XX conversion!)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000110 match = string.join(string.split(match, '_'), ' ')
111 newline = newline + line[:res.start(0)] + mime_decode(match)
112 line = line[res.end(0):]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000113 return newline + line
114
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000115def unmimify_part(ifile, ofile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000116 '''Convert a quoted-printable part of a MIME mail message to 8bit.'''
117 multipart = None
118 quoted_printable = 0
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000119 is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000120 is_repl = 0
121 if ifile.boundary and ifile.boundary[:2] == QUOTE:
122 prefix = QUOTE
123 else:
124 prefix = ''
125
126 # read header
127 hfile = HeaderFile(ifile)
128 while 1:
129 line = hfile.readline()
130 if not line:
131 return
132 if prefix and line[:len(prefix)] == prefix:
133 line = line[len(prefix):]
134 pref = prefix
135 else:
136 pref = ''
137 line = mime_decode_header(line)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000138 if qp.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000139 quoted_printable = 1
140 continue # skip this header
Guido van Rossum31626bc1997-10-24 14:46:16 +0000141 if decode_base64 and base64_re.match(line):
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000142 is_base64 = 1
143 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000144 ofile.write(pref + line)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000145 if not prefix and repl.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000146 # we're dealing with a reply message
147 is_repl = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000148 mp_res = mp.match(line)
149 if mp_res:
150 multipart = '--' + mp_res.group(1)
151 if he.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000152 break
153 if is_repl and (quoted_printable or multipart):
154 is_repl = 0
155
156 # read body
157 while 1:
158 line = ifile.readline()
159 if not line:
160 return
Guido van Rossum31626bc1997-10-24 14:46:16 +0000161 line = re.sub(mime_head, '\\1', line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000162 if prefix and line[:len(prefix)] == prefix:
163 line = line[len(prefix):]
164 pref = prefix
165 else:
166 pref = ''
167## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
168## multipart = line[:-1]
169 while multipart:
170 if line == multipart + '--\n':
171 ofile.write(pref + line)
172 multipart = None
173 line = None
174 break
175 if line == multipart + '\n':
176 ofile.write(pref + line)
177 nifile = File(ifile, multipart)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000178 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000179 line = nifile.peek
180 continue
181 # not a boundary between parts
182 break
183 if line and quoted_printable:
184 while line[-2:] == '=\n':
185 line = line[:-2]
186 newline = ifile.readline()
187 if newline[:len(QUOTE)] == QUOTE:
188 newline = newline[len(QUOTE):]
189 line = line + newline
190 line = mime_decode(line)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000191 if line and is_base64 and not pref:
192 import base64
193 line = base64.decodestring(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000194 if line:
195 ofile.write(pref + line)
196
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000197def unmimify(infile, outfile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000198 '''Convert quoted-printable parts of a MIME mail message to 8bit.'''
199 if type(infile) == type(''):
200 ifile = open(infile)
201 if type(outfile) == type('') and infile == outfile:
202 import os
203 d, f = os.path.split(infile)
204 os.rename(infile, os.path.join(d, ',' + f))
205 else:
206 ifile = infile
207 if type(outfile) == type(''):
208 ofile = open(outfile, 'w')
209 else:
210 ofile = outfile
211 nifile = File(ifile, None)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000212 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000213 ofile.flush()
214
Guido van Rossum31626bc1997-10-24 14:46:16 +0000215mime_char = re.compile('[=\240-\377]') # quote these chars in body
216mime_header_char = re.compile('[=?\240-\377]') # quote these in header
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000217
218def mime_encode(line, header):
219 '''Code a single line as quoted-printable.
220 If header is set, quote some extra characters.'''
221 if header:
222 reg = mime_header_char
223 else:
224 reg = mime_char
225 newline = ''
226 if len(line) >= 5 and line[:5] == 'From ':
227 # quote 'From ' at the start of a line for stupid mailers
228 newline = string.upper('=%02x' % ord('F'))
229 line = line[1:]
230 while 1:
Guido van Rossum31626bc1997-10-24 14:46:16 +0000231 res = reg.search(line)
232 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000233 break
Guido van Rossum31626bc1997-10-24 14:46:16 +0000234 newline = newline + line[:res.start(0)] + \
235 string.upper('=%02x' % ord(line[res.group(0)]))
236 line = line[res.end(0):]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000237 line = newline + line
238
239 newline = ''
240 while len(line) >= 75:
241 i = 73
242 while line[i] == '=' or line[i-1] == '=':
243 i = i - 1
244 i = i + 1
245 newline = newline + line[:i] + '=\n'
246 line = line[i:]
247 return newline + line
248
Guido van Rossum31626bc1997-10-24 14:46:16 +0000249mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\240-\377][-a-zA-Z0-9_+\240-\377]*)([ \t)]|\n)')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000250
251def mime_encode_header(line):
252 '''Code a single header line as quoted-printable.'''
253 newline = ''
254 while 1:
Guido van Rossum31626bc1997-10-24 14:46:16 +0000255 res = mime_header.search(line)
256 if res is None:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000257 break
Guido van Rossum31626bc1997-10-24 14:46:16 +0000258 newline = newline + line[:res.start(0)] + res.group(1) + \
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000259 '=?' + CHARSET + '?Q?' + \
Guido van Rossum31626bc1997-10-24 14:46:16 +0000260 mime_encode(res.group(2), 1) + \
261 '?=' + res.group(3)
262 line = line[res.end(0):]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000263 return newline + line
264
Guido van Rossum31626bc1997-10-24 14:46:16 +0000265mv = re.compile('^mime-version:', re.I)
266cte = re.compile('^content-transfer-encoding:', re.I)
267iso_char = re.compile('[\240-\377]')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000268
269def mimify_part(ifile, ofile, is_mime):
270 '''Convert an 8bit part of a MIME mail message to quoted-printable.'''
Guido van Rossum69155681996-06-10 19:04:02 +0000271 has_cte = is_qp = is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000272 multipart = None
273 must_quote_body = must_quote_header = has_iso_chars = 0
274
275 header = []
276 header_end = ''
277 message = []
278 message_end = ''
279 # read header
280 hfile = HeaderFile(ifile)
281 while 1:
282 line = hfile.readline()
283 if not line:
284 break
Guido van Rossum31626bc1997-10-24 14:46:16 +0000285 if not must_quote_header and iso_char.search(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000286 must_quote_header = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000287 if mv.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000288 is_mime = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000289 if cte.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000290 has_cte = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000291 if qp.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000292 is_qp = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000293 elif base64_re.match(line):
Guido van Rossum69155681996-06-10 19:04:02 +0000294 is_base64 = 1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000295 mp_res = mp.match(line)
296 if mp_res:
297 multipart = '--' + mp_res.group(1)
298 if he.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000299 header_end = line
300 break
301 header.append(line)
302
303 # read body
304 while 1:
305 line = ifile.readline()
306 if not line:
307 break
308 if multipart:
309 if line == multipart + '--\n':
310 message_end = line
311 break
312 if line == multipart + '\n':
313 message_end = line
314 break
Guido van Rossum69155681996-06-10 19:04:02 +0000315 if is_base64:
316 message.append(line)
317 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000318 if is_qp:
319 while line[-2:] == '=\n':
320 line = line[:-2]
321 newline = ifile.readline()
322 if newline[:len(QUOTE)] == QUOTE:
323 newline = newline[len(QUOTE):]
324 line = line + newline
325 line = mime_decode(line)
326 message.append(line)
327 if not has_iso_chars:
Guido van Rossum31626bc1997-10-24 14:46:16 +0000328 if iso_char.search(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000329 has_iso_chars = must_quote_body = 1
330 if not must_quote_body:
331 if len(line) > MAXLEN:
332 must_quote_body = 1
333
334 # convert and output header and body
335 for line in header:
336 if must_quote_header:
337 line = mime_encode_header(line)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000338 chrset_res = chrset.match(line)
339 if chrset_res:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000340 if has_iso_chars:
341 # change us-ascii into iso-8859-1
Guido van Rossum31626bc1997-10-24 14:46:16 +0000342 if string.lower(chrset_res.group(2)) == 'us-ascii':
343 line = chrset_res.group(1) + \
344 CHARSET + chrset_res.group(3)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000345 else:
346 # change iso-8859-* into us-ascii
Guido van Rossum31626bc1997-10-24 14:46:16 +0000347 line = chrset_res.group(1) + 'us-ascii' + chrset_res.group(3)
348 if has_cte and cte.match(line):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000349 line = 'Content-Transfer-Encoding: '
Guido van Rossum69155681996-06-10 19:04:02 +0000350 if is_base64:
351 line = line + 'base64\n'
352 elif must_quote_body:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000353 line = line + 'quoted-printable\n'
354 else:
355 line = line + '7bit\n'
356 ofile.write(line)
357 if (must_quote_header or must_quote_body) and not is_mime:
358 ofile.write('Mime-Version: 1.0\n')
359 ofile.write('Content-Type: text/plain; ')
360 if has_iso_chars:
361 ofile.write('charset="%s"\n' % CHARSET)
362 else:
363 ofile.write('charset="us-ascii"\n')
364 if must_quote_body and not has_cte:
365 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
366 ofile.write(header_end)
367
368 for line in message:
369 if must_quote_body:
370 line = mime_encode(line, 0)
371 ofile.write(line)
372 ofile.write(message_end)
373
374 line = message_end
375 while multipart:
376 if line == multipart + '--\n':
Guido van Rossumf789ee41997-03-20 14:42:17 +0000377 # read bit after the end of the last part
378 while 1:
379 line = ifile.readline()
380 if not line:
381 return
382 if must_quote_body:
383 line = mime_encode(line, 0)
384 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000385 if line == multipart + '\n':
386 nifile = File(ifile, multipart)
387 mimify_part(nifile, ofile, 1)
388 line = nifile.peek
389 ofile.write(line)
390 continue
391
392def mimify(infile, outfile):
393 '''Convert 8bit parts of a MIME mail message to quoted-printable.'''
394 if type(infile) == type(''):
395 ifile = open(infile)
396 if type(outfile) == type('') and infile == outfile:
397 import os
398 d, f = os.path.split(infile)
399 os.rename(infile, os.path.join(d, ',' + f))
400 else:
401 ifile = infile
402 if type(outfile) == type(''):
403 ofile = open(outfile, 'w')
404 else:
405 ofile = outfile
406 nifile = File(ifile, None)
407 mimify_part(nifile, ofile, 0)
408 ofile.flush()
409
410import sys
411if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
412 import getopt
413 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
414
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000415 decode_base64 = 0
416 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000417 if len(args) not in (0, 1, 2):
418 print usage
419 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000420 if (('-e', '') in opts) == (('-d', '') in opts) or \
421 ((('-b', '') in opts) and (('-d', '') not in opts)):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000422 print usage
423 sys.exit(1)
424 for o, a in opts:
425 if o == '-e':
426 encode = mimify
427 elif o == '-d':
428 encode = unmimify
429 elif o == '-l':
430 try:
431 MAXLEN = string.atoi(a)
432 except:
433 print usage
434 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000435 elif o == '-b':
436 decode_base64 = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000437 if len(args) == 0:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000438 encode_args = (sys.stdin, sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000439 elif len(args) == 1:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000440 encode_args = (args[0], sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000441 else:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000442 encode_args = (args[0], args[1])
443 if decode_base64:
444 encode_args = encode_args + (decode_base64,)
445 apply(encode, encode_args)
Guido van Rossum31626bc1997-10-24 14:46:16 +0000446