blob: 4c62c2442d9fbdb93b8994f5e5dbe76c6312d53e [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00002
3'''Mimification and unmimification of mail messages.
4
5decode quoted-printable parts of a mail message or encode using
6quoted-printable.
7
8Usage:
9 mimify(input, output)
Guido van Rossum74d25e71997-07-30 22:02:28 +000010 unmimify(input, output, decode_base64 = 0)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000011to encode and decode respectively. Input and output may be the name
12of a file or an open file object. Only a readline() method is used
13on the input file, only a write() method is used on the output file.
14When using file names, the input and output file names may be the
15same.
16
17Interactive usage:
18 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
20to encode and decode respectively. Infile defaults to standard
21input and outfile to standard output.
22'''
23
24# Configure
25MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27QUOTE = '> ' # string replies are quoted with
28# End configure
29
30import regex, regsub, string
31
Guido van Rossum69155681996-06-10 19:04:02 +000032qp = regex.compile('^content-transfer-encoding:[ \t]*quoted-printable',
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000033 regex.casefold)
Guido van Rossuma3d9e021997-04-11 15:22:56 +000034base64_re = regex.compile('^content-transfer-encoding:[ \t]*base64',
Guido van Rossum69155681996-06-10 19:04:02 +000035 regex.casefold)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000036mp = regex.compile('^content-type:[\000-\377]*multipart/[\000-\377]*boundary="?\\([^;"\n]*\\)',
37 regex.casefold)
38chrset = regex.compile('^\\(content-type:.*charset="\\)\\(us-ascii\\|iso-8859-[0-9]+\\)\\("[\000-\377]*\\)',
39 regex.casefold)
40he = regex.compile('^-*$')
41mime_code = regex.compile('=\\([0-9a-f][0-9a-f]\\)', regex.casefold)
Guido van Rossum88bb8081997-08-14 14:10:37 +000042mime_head = regex.compile('=\\?iso-8859-1\\?q\\?\\([^? \t\n]+\\)\\?=',
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000043 regex.casefold)
44repl = regex.compile('^subject:[ \t]+re: ', regex.casefold)
45
46class File:
47 '''A simple fake file object that knows about limited
48 read-ahead and boundaries.
49 The only supported method is readline().'''
50
51 def __init__(self, file, boundary):
52 self.file = file
53 self.boundary = boundary
54 self.peek = None
55
56 def readline(self):
57 if self.peek is not None:
58 return ''
59 line = self.file.readline()
60 if not line:
61 return line
62 if self.boundary:
63 if line == self.boundary + '\n':
64 self.peek = line
65 return ''
66 if line == self.boundary + '--\n':
67 self.peek = line
68 return ''
69 return line
70
71class HeaderFile:
72 def __init__(self, file):
73 self.file = file
74 self.peek = None
75
76 def readline(self):
77 if self.peek is not None:
78 line = self.peek
79 self.peek = None
80 else:
81 line = self.file.readline()
82 if not line:
83 return line
84 if he.match(line) >= 0:
85 return line
86 while 1:
87 self.peek = self.file.readline()
88 if len(self.peek) == 0 or \
89 (self.peek[0] != ' ' and self.peek[0] != '\t'):
90 return line
91 line = line + self.peek
92 self.peek = None
93
94def mime_decode(line):
95 '''Decode a single line of quoted-printable text to 8bit.'''
96 newline = ''
97 while 1:
98 i = mime_code.search(line)
99 if i < 0:
100 break
101 newline = newline + line[:i] + \
102 chr(string.atoi(mime_code.group(1), 16))
103 line = line[i+3:]
104 return newline + line
105
106def mime_decode_header(line):
107 '''Decode a header line to 8bit.'''
108 newline = ''
109 while 1:
110 i = mime_head.search(line)
111 if i < 0:
112 break
Guido van Rossum88bb8081997-08-14 14:10:37 +0000113 match0, match1 = mime_head.group(0, 1)
114 # convert underscores to spaces (before =XX conversion!)
115 match1 = string.join(string.split(match1, '_'), ' ')
116 newline = newline + line[:i] + mime_decode(match1)
117 line = line[i + len(match0):]
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000118 return newline + line
119
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000120def unmimify_part(ifile, ofile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000121 '''Convert a quoted-printable part of a MIME mail message to 8bit.'''
122 multipart = None
123 quoted_printable = 0
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000124 is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000125 is_repl = 0
126 if ifile.boundary and ifile.boundary[:2] == QUOTE:
127 prefix = QUOTE
128 else:
129 prefix = ''
130
131 # read header
132 hfile = HeaderFile(ifile)
133 while 1:
134 line = hfile.readline()
135 if not line:
136 return
137 if prefix and line[:len(prefix)] == prefix:
138 line = line[len(prefix):]
139 pref = prefix
140 else:
141 pref = ''
142 line = mime_decode_header(line)
143 if qp.match(line) >= 0:
144 quoted_printable = 1
145 continue # skip this header
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000146 if decode_base64 and base64_re.match(line) >= 0:
147 is_base64 = 1
148 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000149 ofile.write(pref + line)
150 if not prefix and repl.match(line) >= 0:
151 # we're dealing with a reply message
152 is_repl = 1
153 if mp.match(line) >= 0:
154 multipart = '--' + mp.group(1)
155 if he.match(line) >= 0:
156 break
157 if is_repl and (quoted_printable or multipart):
158 is_repl = 0
159
160 # read body
161 while 1:
162 line = ifile.readline()
163 if not line:
164 return
165 line = regsub.gsub(mime_head, '\\1', line)
166 if prefix and line[:len(prefix)] == prefix:
167 line = line[len(prefix):]
168 pref = prefix
169 else:
170 pref = ''
171## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
172## multipart = line[:-1]
173 while multipart:
174 if line == multipart + '--\n':
175 ofile.write(pref + line)
176 multipart = None
177 line = None
178 break
179 if line == multipart + '\n':
180 ofile.write(pref + line)
181 nifile = File(ifile, multipart)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000182 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000183 line = nifile.peek
184 continue
185 # not a boundary between parts
186 break
187 if line and quoted_printable:
188 while line[-2:] == '=\n':
189 line = line[:-2]
190 newline = ifile.readline()
191 if newline[:len(QUOTE)] == QUOTE:
192 newline = newline[len(QUOTE):]
193 line = line + newline
194 line = mime_decode(line)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000195 if line and is_base64 and not pref:
196 import base64
197 line = base64.decodestring(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000198 if line:
199 ofile.write(pref + line)
200
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000201def unmimify(infile, outfile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000202 '''Convert quoted-printable parts of a MIME mail message to 8bit.'''
203 if type(infile) == type(''):
204 ifile = open(infile)
205 if type(outfile) == type('') and infile == outfile:
206 import os
207 d, f = os.path.split(infile)
208 os.rename(infile, os.path.join(d, ',' + f))
209 else:
210 ifile = infile
211 if type(outfile) == type(''):
212 ofile = open(outfile, 'w')
213 else:
214 ofile = outfile
215 nifile = File(ifile, None)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000216 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000217 ofile.flush()
218
219mime_char = regex.compile('[=\240-\377]') # quote these chars in body
220mime_header_char = regex.compile('[=?\240-\377]') # quote these in header
221
222def mime_encode(line, header):
223 '''Code a single line as quoted-printable.
224 If header is set, quote some extra characters.'''
225 if header:
226 reg = mime_header_char
227 else:
228 reg = mime_char
229 newline = ''
230 if len(line) >= 5 and line[:5] == 'From ':
231 # quote 'From ' at the start of a line for stupid mailers
232 newline = string.upper('=%02x' % ord('F'))
233 line = line[1:]
234 while 1:
235 i = reg.search(line)
236 if i < 0:
237 break
238 newline = newline + line[:i] + \
239 string.upper('=%02x' % ord(line[i]))
240 line = line[i+1:]
241 line = newline + line
242
243 newline = ''
244 while len(line) >= 75:
245 i = 73
246 while line[i] == '=' or line[i-1] == '=':
247 i = i - 1
248 i = i + 1
249 newline = newline + line[:i] + '=\n'
250 line = line[i:]
251 return newline + line
252
Guido van Rossum68ded211996-05-28 19:52:40 +0000253mime_header = regex.compile('\\([ \t(]\\|^\\)\\([-a-zA-Z0-9_+]*[\240-\377][-a-zA-Z0-9_+\240-\377]*\\)\\([ \t)]\\|$\\)')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000254
255def mime_encode_header(line):
256 '''Code a single header line as quoted-printable.'''
257 newline = ''
258 while 1:
259 i = mime_header.search(line)
260 if i < 0:
261 break
262 newline = newline + line[:i] + mime_header.group(1) + \
263 '=?' + CHARSET + '?Q?' + \
264 mime_encode(mime_header.group(2), 1) + \
265 '?=' + mime_header.group(3)
266 line = line[i+len(mime_header.group(0)):]
267 return newline + line
268
269mv = regex.compile('^mime-version:', regex.casefold)
270cte = regex.compile('^content-transfer-encoding:', regex.casefold)
271iso_char = regex.compile('[\240-\377]')
272
273def mimify_part(ifile, ofile, is_mime):
274 '''Convert an 8bit part of a MIME mail message to quoted-printable.'''
Guido van Rossum69155681996-06-10 19:04:02 +0000275 has_cte = is_qp = is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000276 multipart = None
277 must_quote_body = must_quote_header = has_iso_chars = 0
278
279 header = []
280 header_end = ''
281 message = []
282 message_end = ''
283 # read header
284 hfile = HeaderFile(ifile)
285 while 1:
286 line = hfile.readline()
287 if not line:
288 break
289 if not must_quote_header and iso_char.search(line) >= 0:
290 must_quote_header = 1
291 if mv.match(line) >= 0:
292 is_mime = 1
293 if cte.match(line) >= 0:
294 has_cte = 1
295 if qp.match(line) >= 0:
296 is_qp = 1
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000297 elif base64_re.match(line) >= 0:
Guido van Rossum69155681996-06-10 19:04:02 +0000298 is_base64 = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000299 if mp.match(line) >= 0:
300 multipart = '--' + mp.group(1)
301 if he.match(line) >= 0:
302 header_end = line
303 break
304 header.append(line)
305
306 # read body
307 while 1:
308 line = ifile.readline()
309 if not line:
310 break
311 if multipart:
312 if line == multipart + '--\n':
313 message_end = line
314 break
315 if line == multipart + '\n':
316 message_end = line
317 break
Guido van Rossum69155681996-06-10 19:04:02 +0000318 if is_base64:
319 message.append(line)
320 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000321 if is_qp:
322 while line[-2:] == '=\n':
323 line = line[:-2]
324 newline = ifile.readline()
325 if newline[:len(QUOTE)] == QUOTE:
326 newline = newline[len(QUOTE):]
327 line = line + newline
328 line = mime_decode(line)
329 message.append(line)
330 if not has_iso_chars:
331 if iso_char.search(line) >= 0:
332 has_iso_chars = must_quote_body = 1
333 if not must_quote_body:
334 if len(line) > MAXLEN:
335 must_quote_body = 1
336
337 # convert and output header and body
338 for line in header:
339 if must_quote_header:
340 line = mime_encode_header(line)
341 if chrset.match(line) >= 0:
342 if has_iso_chars:
343 # change us-ascii into iso-8859-1
344 if string.lower(chrset.group(2)) == 'us-ascii':
345 line = chrset.group(1) + \
346 CHARSET + chrset.group(3)
347 else:
348 # change iso-8859-* into us-ascii
349 line = chrset.group(1) + 'us-ascii' + chrset.group(3)
350 if has_cte and cte.match(line) >= 0:
351 line = 'Content-Transfer-Encoding: '
Guido van Rossum69155681996-06-10 19:04:02 +0000352 if is_base64:
353 line = line + 'base64\n'
354 elif must_quote_body:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000355 line = line + 'quoted-printable\n'
356 else:
357 line = line + '7bit\n'
358 ofile.write(line)
359 if (must_quote_header or must_quote_body) and not is_mime:
360 ofile.write('Mime-Version: 1.0\n')
361 ofile.write('Content-Type: text/plain; ')
362 if has_iso_chars:
363 ofile.write('charset="%s"\n' % CHARSET)
364 else:
365 ofile.write('charset="us-ascii"\n')
366 if must_quote_body and not has_cte:
367 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
368 ofile.write(header_end)
369
370 for line in message:
371 if must_quote_body:
372 line = mime_encode(line, 0)
373 ofile.write(line)
374 ofile.write(message_end)
375
376 line = message_end
377 while multipart:
378 if line == multipart + '--\n':
Guido van Rossumf789ee41997-03-20 14:42:17 +0000379 # read bit after the end of the last part
380 while 1:
381 line = ifile.readline()
382 if not line:
383 return
384 if must_quote_body:
385 line = mime_encode(line, 0)
386 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000387 if line == multipart + '\n':
388 nifile = File(ifile, multipart)
389 mimify_part(nifile, ofile, 1)
390 line = nifile.peek
391 ofile.write(line)
392 continue
393
394def mimify(infile, outfile):
395 '''Convert 8bit parts of a MIME mail message to quoted-printable.'''
396 if type(infile) == type(''):
397 ifile = open(infile)
398 if type(outfile) == type('') and infile == outfile:
399 import os
400 d, f = os.path.split(infile)
401 os.rename(infile, os.path.join(d, ',' + f))
402 else:
403 ifile = infile
404 if type(outfile) == type(''):
405 ofile = open(outfile, 'w')
406 else:
407 ofile = outfile
408 nifile = File(ifile, None)
409 mimify_part(nifile, ofile, 0)
410 ofile.flush()
411
412import sys
413if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
414 import getopt
415 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
416
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000417 decode_base64 = 0
418 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000419 if len(args) not in (0, 1, 2):
420 print usage
421 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000422 if (('-e', '') in opts) == (('-d', '') in opts) or \
423 ((('-b', '') in opts) and (('-d', '') not in opts)):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000424 print usage
425 sys.exit(1)
426 for o, a in opts:
427 if o == '-e':
428 encode = mimify
429 elif o == '-d':
430 encode = unmimify
431 elif o == '-l':
432 try:
433 MAXLEN = string.atoi(a)
434 except:
435 print usage
436 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000437 elif o == '-b':
438 decode_base64 = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000439 if len(args) == 0:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000440 encode_args = (sys.stdin, sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000441 elif len(args) == 1:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000442 encode_args = (args[0], sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000443 else:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000444 encode_args = (args[0], args[1])
445 if decode_base64:
446 encode_args = encode_args + (decode_base64,)
447 apply(encode, encode_args)