blob: b6f61439d758d4f7d375a9649f6bf3624b886a7a [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00002
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00003"""Mimification and unmimification of mail messages.
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00004
Guido van Rossum54f22ed2000-02-04 15:10:34 +00005Decode quoted-printable parts of a mail message or encode using
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00006quoted-printable.
7
8Usage:
Tim Peters07e99cb2001-01-14 23:47:14 +00009 mimify(input, output)
10 unmimify(input, output, decode_base64 = 0)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000011to encode and decode respectively. Input and output may be the name
12of a file or an open file object. Only a readline() method is used
13on the input file, only a write() method is used on the output file.
14When using file names, the input and output file names may be the
15same.
16
17Interactive usage:
Tim Peters07e99cb2001-01-14 23:47:14 +000018 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000020to encode and decode respectively. Infile defaults to standard
21input and outfile to standard output.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000022"""
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000023
24# Configure
Tim Peters07e99cb2001-01-14 23:47:14 +000025MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27QUOTE = '> ' # string replies are quoted with
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000028# End configure
29
Eric S. Raymond6b8c5282001-02-09 07:10:12 +000030import re
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000031
Skip Montanaro03d90142001-01-25 15:29:22 +000032__all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"]
33
Guido van Rossum31626bc1997-10-24 14:46:16 +000034qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
35base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
36mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
37chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
38he = re.compile('^-*\n')
39mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
40mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
41repl = re.compile('^subject:\\s+re: ', re.I)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000042
43class File:
Tim Peters07e99cb2001-01-14 23:47:14 +000044 """A simple fake file object that knows about limited read-ahead and
45 boundaries. The only supported method is readline()."""
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000046
Tim Peters07e99cb2001-01-14 23:47:14 +000047 def __init__(self, file, boundary):
48 self.file = file
49 self.boundary = boundary
50 self.peek = None
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000051
Tim Peters07e99cb2001-01-14 23:47:14 +000052 def readline(self):
53 if self.peek is not None:
54 return ''
55 line = self.file.readline()
56 if not line:
57 return line
58 if self.boundary:
59 if line == self.boundary + '\n':
60 self.peek = line
61 return ''
62 if line == self.boundary + '--\n':
63 self.peek = line
64 return ''
65 return line
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000066
67class HeaderFile:
Tim Peters07e99cb2001-01-14 23:47:14 +000068 def __init__(self, file):
69 self.file = file
70 self.peek = None
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000071
Tim Peters07e99cb2001-01-14 23:47:14 +000072 def readline(self):
73 if self.peek is not None:
74 line = self.peek
75 self.peek = None
76 else:
77 line = self.file.readline()
78 if not line:
79 return line
80 if he.match(line):
81 return line
82 while 1:
83 self.peek = self.file.readline()
84 if len(self.peek) == 0 or \
85 (self.peek[0] != ' ' and self.peek[0] != '\t'):
86 return line
87 line = line + self.peek
88 self.peek = None
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000089
90def mime_decode(line):
Tim Peters07e99cb2001-01-14 23:47:14 +000091 """Decode a single line of quoted-printable text to 8bit."""
92 newline = ''
93 pos = 0
94 while 1:
95 res = mime_code.search(line, pos)
96 if res is None:
97 break
98 newline = newline + line[pos:res.start(0)] + \
Eric S. Raymond6b8c5282001-02-09 07:10:12 +000099 chr(int(res.group(1), 16))
Tim Peters07e99cb2001-01-14 23:47:14 +0000100 pos = res.end(0)
101 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000102
103def mime_decode_header(line):
Tim Peters07e99cb2001-01-14 23:47:14 +0000104 """Decode a header line to 8bit."""
105 newline = ''
106 pos = 0
107 while 1:
108 res = mime_head.search(line, pos)
109 if res is None:
110 break
111 match = res.group(1)
112 # convert underscores to spaces (before =XX conversion!)
Eric S. Raymond6e025bc2001-02-10 00:22:33 +0000113 match = ' '.join(match.split('_'))
Tim Peters07e99cb2001-01-14 23:47:14 +0000114 newline = newline + line[pos:res.start(0)] + mime_decode(match)
115 pos = res.end(0)
116 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000117
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000118def unmimify_part(ifile, ofile, decode_base64 = 0):
Tim Peters07e99cb2001-01-14 23:47:14 +0000119 """Convert a quoted-printable part of a MIME mail message to 8bit."""
120 multipart = None
121 quoted_printable = 0
122 is_base64 = 0
123 is_repl = 0
124 if ifile.boundary and ifile.boundary[:2] == QUOTE:
125 prefix = QUOTE
126 else:
127 prefix = ''
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000128
Tim Peters07e99cb2001-01-14 23:47:14 +0000129 # read header
130 hfile = HeaderFile(ifile)
131 while 1:
132 line = hfile.readline()
133 if not line:
134 return
135 if prefix and line[:len(prefix)] == prefix:
136 line = line[len(prefix):]
137 pref = prefix
138 else:
139 pref = ''
140 line = mime_decode_header(line)
141 if qp.match(line):
142 quoted_printable = 1
143 continue # skip this header
144 if decode_base64 and base64_re.match(line):
145 is_base64 = 1
146 continue
147 ofile.write(pref + line)
148 if not prefix and repl.match(line):
149 # we're dealing with a reply message
150 is_repl = 1
151 mp_res = mp.match(line)
152 if mp_res:
153 multipart = '--' + mp_res.group(1)
154 if he.match(line):
155 break
156 if is_repl and (quoted_printable or multipart):
157 is_repl = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000158
Tim Peters07e99cb2001-01-14 23:47:14 +0000159 # read body
160 while 1:
161 line = ifile.readline()
162 if not line:
163 return
164 line = re.sub(mime_head, '\\1', line)
165 if prefix and line[:len(prefix)] == prefix:
166 line = line[len(prefix):]
167 pref = prefix
168 else:
169 pref = ''
170## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
171## multipart = line[:-1]
172 while multipart:
173 if line == multipart + '--\n':
174 ofile.write(pref + line)
175 multipart = None
176 line = None
177 break
178 if line == multipart + '\n':
179 ofile.write(pref + line)
180 nifile = File(ifile, multipart)
181 unmimify_part(nifile, ofile, decode_base64)
182 line = nifile.peek
183 if not line:
184 # premature end of file
185 break
186 continue
187 # not a boundary between parts
188 break
189 if line and quoted_printable:
190 while line[-2:] == '=\n':
191 line = line[:-2]
192 newline = ifile.readline()
193 if newline[:len(QUOTE)] == QUOTE:
194 newline = newline[len(QUOTE):]
195 line = line + newline
196 line = mime_decode(line)
197 if line and is_base64 and not pref:
198 import base64
199 line = base64.decodestring(line)
200 if line:
201 ofile.write(pref + line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000202
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000203def unmimify(infile, outfile, decode_base64 = 0):
Tim Peters07e99cb2001-01-14 23:47:14 +0000204 """Convert quoted-printable parts of a MIME mail message to 8bit."""
205 if type(infile) == type(''):
206 ifile = open(infile)
207 if type(outfile) == type('') and infile == outfile:
208 import os
209 d, f = os.path.split(infile)
210 os.rename(infile, os.path.join(d, ',' + f))
211 else:
212 ifile = infile
213 if type(outfile) == type(''):
214 ofile = open(outfile, 'w')
215 else:
216 ofile = outfile
217 nifile = File(ifile, None)
218 unmimify_part(nifile, ofile, decode_base64)
219 ofile.flush()
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000220
Guido van Rossum11fbef51997-12-02 17:45:39 +0000221mime_char = re.compile('[=\177-\377]') # quote these chars in body
222mime_header_char = re.compile('[=?\177-\377]') # quote these in header
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000223
224def mime_encode(line, header):
Tim Peters07e99cb2001-01-14 23:47:14 +0000225 """Code a single line as quoted-printable.
226 If header is set, quote some extra characters."""
227 if header:
228 reg = mime_header_char
229 else:
230 reg = mime_char
231 newline = ''
232 pos = 0
233 if len(line) >= 5 and line[:5] == 'From ':
234 # quote 'From ' at the start of a line for stupid mailers
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000235 newline = ('=%02x' % ord('F')).upper()
Tim Peters07e99cb2001-01-14 23:47:14 +0000236 pos = 1
237 while 1:
238 res = reg.search(line, pos)
239 if res is None:
240 break
241 newline = newline + line[pos:res.start(0)] + \
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000242 ('=%02x' % ord(res.group(0))).upper()
Tim Peters07e99cb2001-01-14 23:47:14 +0000243 pos = res.end(0)
244 line = newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000245
Tim Peters07e99cb2001-01-14 23:47:14 +0000246 newline = ''
247 while len(line) >= 75:
248 i = 73
249 while line[i] == '=' or line[i-1] == '=':
250 i = i - 1
251 i = i + 1
252 newline = newline + line[:i] + '=\n'
253 line = line[i:]
254 return newline + line
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000255
unknownfee75ac2001-07-04 10:15:58 +0000256mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000257
258def mime_encode_header(line):
Tim Peters07e99cb2001-01-14 23:47:14 +0000259 """Code a single header line as quoted-printable."""
260 newline = ''
261 pos = 0
262 while 1:
263 res = mime_header.search(line, pos)
264 if res is None:
265 break
unknownfee75ac2001-07-04 10:15:58 +0000266 newline = '%s%s%s=?%s?Q?%s?=' % \
Tim Peters07e99cb2001-01-14 23:47:14 +0000267 (newline, line[pos:res.start(0)], res.group(1),
unknownfee75ac2001-07-04 10:15:58 +0000268 CHARSET, mime_encode(res.group(2), 1))
Tim Peters07e99cb2001-01-14 23:47:14 +0000269 pos = res.end(0)
270 return newline + line[pos:]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000271
Guido van Rossum31626bc1997-10-24 14:46:16 +0000272mv = re.compile('^mime-version:', re.I)
273cte = re.compile('^content-transfer-encoding:', re.I)
Guido van Rossum11fbef51997-12-02 17:45:39 +0000274iso_char = re.compile('[\177-\377]')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000275
276def mimify_part(ifile, ofile, is_mime):
Tim Peters07e99cb2001-01-14 23:47:14 +0000277 """Convert an 8bit part of a MIME mail message to quoted-printable."""
278 has_cte = is_qp = is_base64 = 0
279 multipart = None
280 must_quote_body = must_quote_header = has_iso_chars = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000281
Tim Peters07e99cb2001-01-14 23:47:14 +0000282 header = []
283 header_end = ''
284 message = []
285 message_end = ''
286 # read header
287 hfile = HeaderFile(ifile)
288 while 1:
289 line = hfile.readline()
290 if not line:
291 break
292 if not must_quote_header and iso_char.search(line):
293 must_quote_header = 1
294 if mv.match(line):
295 is_mime = 1
296 if cte.match(line):
297 has_cte = 1
298 if qp.match(line):
299 is_qp = 1
300 elif base64_re.match(line):
301 is_base64 = 1
302 mp_res = mp.match(line)
303 if mp_res:
304 multipart = '--' + mp_res.group(1)
305 if he.match(line):
306 header_end = line
307 break
308 header.append(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000309
Tim Peters07e99cb2001-01-14 23:47:14 +0000310 # read body
311 while 1:
312 line = ifile.readline()
313 if not line:
314 break
315 if multipart:
316 if line == multipart + '--\n':
317 message_end = line
318 break
319 if line == multipart + '\n':
320 message_end = line
321 break
322 if is_base64:
323 message.append(line)
324 continue
325 if is_qp:
326 while line[-2:] == '=\n':
327 line = line[:-2]
328 newline = ifile.readline()
329 if newline[:len(QUOTE)] == QUOTE:
330 newline = newline[len(QUOTE):]
331 line = line + newline
332 line = mime_decode(line)
333 message.append(line)
334 if not has_iso_chars:
335 if iso_char.search(line):
336 has_iso_chars = must_quote_body = 1
337 if not must_quote_body:
338 if len(line) > MAXLEN:
339 must_quote_body = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000340
Tim Peters07e99cb2001-01-14 23:47:14 +0000341 # convert and output header and body
342 for line in header:
343 if must_quote_header:
344 line = mime_encode_header(line)
345 chrset_res = chrset.match(line)
346 if chrset_res:
347 if has_iso_chars:
348 # change us-ascii into iso-8859-1
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000349 if chrset_res.group(2).lower() == 'us-ascii':
Tim Peters07e99cb2001-01-14 23:47:14 +0000350 line = '%s%s%s' % (chrset_res.group(1),
351 CHARSET,
352 chrset_res.group(3))
353 else:
354 # change iso-8859-* into us-ascii
355 line = '%sus-ascii%s' % chrset_res.group(1, 3)
356 if has_cte and cte.match(line):
357 line = 'Content-Transfer-Encoding: '
358 if is_base64:
359 line = line + 'base64\n'
360 elif must_quote_body:
361 line = line + 'quoted-printable\n'
362 else:
363 line = line + '7bit\n'
364 ofile.write(line)
365 if (must_quote_header or must_quote_body) and not is_mime:
366 ofile.write('Mime-Version: 1.0\n')
367 ofile.write('Content-Type: text/plain; ')
368 if has_iso_chars:
369 ofile.write('charset="%s"\n' % CHARSET)
370 else:
371 ofile.write('charset="us-ascii"\n')
372 if must_quote_body and not has_cte:
373 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
374 ofile.write(header_end)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000375
Tim Peters07e99cb2001-01-14 23:47:14 +0000376 for line in message:
377 if must_quote_body:
378 line = mime_encode(line, 0)
379 ofile.write(line)
380 ofile.write(message_end)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000381
Tim Peters07e99cb2001-01-14 23:47:14 +0000382 line = message_end
383 while multipart:
384 if line == multipart + '--\n':
385 # read bit after the end of the last part
386 while 1:
387 line = ifile.readline()
388 if not line:
389 return
390 if must_quote_body:
391 line = mime_encode(line, 0)
392 ofile.write(line)
393 if line == multipart + '\n':
394 nifile = File(ifile, multipart)
395 mimify_part(nifile, ofile, 1)
396 line = nifile.peek
397 if not line:
398 # premature end of file
399 break
400 ofile.write(line)
401 continue
402 # unexpectedly no multipart separator--copy rest of file
403 while 1:
404 line = ifile.readline()
405 if not line:
406 return
407 if must_quote_body:
408 line = mime_encode(line, 0)
409 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000410
411def mimify(infile, outfile):
Tim Peters07e99cb2001-01-14 23:47:14 +0000412 """Convert 8bit parts of a MIME mail message to quoted-printable."""
413 if type(infile) == type(''):
414 ifile = open(infile)
415 if type(outfile) == type('') and infile == outfile:
416 import os
417 d, f = os.path.split(infile)
418 os.rename(infile, os.path.join(d, ',' + f))
419 else:
420 ifile = infile
421 if type(outfile) == type(''):
422 ofile = open(outfile, 'w')
423 else:
424 ofile = outfile
425 nifile = File(ifile, None)
426 mimify_part(nifile, ofile, 0)
427 ofile.flush()
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000428
429import sys
430if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000431 import getopt
432 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000433
Tim Peters07e99cb2001-01-14 23:47:14 +0000434 decode_base64 = 0
435 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
436 if len(args) not in (0, 1, 2):
437 print usage
438 sys.exit(1)
439 if (('-e', '') in opts) == (('-d', '') in opts) or \
440 ((('-b', '') in opts) and (('-d', '') not in opts)):
441 print usage
442 sys.exit(1)
443 for o, a in opts:
444 if o == '-e':
445 encode = mimify
446 elif o == '-d':
447 encode = unmimify
448 elif o == '-l':
449 try:
Eric S. Raymond6b8c5282001-02-09 07:10:12 +0000450 MAXLEN = int(a)
Guido van Rossum6274fff2001-04-10 15:42:02 +0000451 except (ValueError, OverflowError):
Tim Peters07e99cb2001-01-14 23:47:14 +0000452 print usage
453 sys.exit(1)
454 elif o == '-b':
455 decode_base64 = 1
456 if len(args) == 0:
457 encode_args = (sys.stdin, sys.stdout)
458 elif len(args) == 1:
459 encode_args = (args[0], sys.stdout)
460 else:
461 encode_args = (args[0], args[1])
462 if decode_base64:
463 encode_args = encode_args + (decode_base64,)
Guido van Rossum68468eb2003-02-27 20:14:51 +0000464 encode(*encode_args)