blob: c8af62385d0505a036512e016f1d8372bb1c5631 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +00002
3'''Mimification and unmimification of mail messages.
4
5decode quoted-printable parts of a mail message or encode using
6quoted-printable.
7
8Usage:
9 mimify(input, output)
10 unmimify(input, output)
11to encode and decode respectively. Input and output may be the name
12of a file or an open file object. Only a readline() method is used
13on the input file, only a write() method is used on the output file.
14When using file names, the input and output file names may be the
15same.
16
17Interactive usage:
18 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
20to encode and decode respectively. Infile defaults to standard
21input and outfile to standard output.
22'''
23
24# Configure
25MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27QUOTE = '> ' # string replies are quoted with
28# End configure
29
30import regex, regsub, string
31
Guido van Rossum69155681996-06-10 19:04:02 +000032qp = regex.compile('^content-transfer-encoding:[ \t]*quoted-printable',
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000033 regex.casefold)
Guido van Rossuma3d9e021997-04-11 15:22:56 +000034base64_re = regex.compile('^content-transfer-encoding:[ \t]*base64',
Guido van Rossum69155681996-06-10 19:04:02 +000035 regex.casefold)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +000036mp = regex.compile('^content-type:[\000-\377]*multipart/[\000-\377]*boundary="?\\([^;"\n]*\\)',
37 regex.casefold)
38chrset = regex.compile('^\\(content-type:.*charset="\\)\\(us-ascii\\|iso-8859-[0-9]+\\)\\("[\000-\377]*\\)',
39 regex.casefold)
40he = regex.compile('^-*$')
41mime_code = regex.compile('=\\([0-9a-f][0-9a-f]\\)', regex.casefold)
42mime_head = regex.compile('=\\?iso-8859-1\\?q\\?\\([^?]+\\)\\?=',
43 regex.casefold)
44repl = regex.compile('^subject:[ \t]+re: ', regex.casefold)
45
46class File:
47 '''A simple fake file object that knows about limited
48 read-ahead and boundaries.
49 The only supported method is readline().'''
50
51 def __init__(self, file, boundary):
52 self.file = file
53 self.boundary = boundary
54 self.peek = None
55
56 def readline(self):
57 if self.peek is not None:
58 return ''
59 line = self.file.readline()
60 if not line:
61 return line
62 if self.boundary:
63 if line == self.boundary + '\n':
64 self.peek = line
65 return ''
66 if line == self.boundary + '--\n':
67 self.peek = line
68 return ''
69 return line
70
71class HeaderFile:
72 def __init__(self, file):
73 self.file = file
74 self.peek = None
75
76 def readline(self):
77 if self.peek is not None:
78 line = self.peek
79 self.peek = None
80 else:
81 line = self.file.readline()
82 if not line:
83 return line
84 if he.match(line) >= 0:
85 return line
86 while 1:
87 self.peek = self.file.readline()
88 if len(self.peek) == 0 or \
89 (self.peek[0] != ' ' and self.peek[0] != '\t'):
90 return line
91 line = line + self.peek
92 self.peek = None
93
94def mime_decode(line):
95 '''Decode a single line of quoted-printable text to 8bit.'''
96 newline = ''
97 while 1:
98 i = mime_code.search(line)
99 if i < 0:
100 break
101 newline = newline + line[:i] + \
102 chr(string.atoi(mime_code.group(1), 16))
103 line = line[i+3:]
104 return newline + line
105
106def mime_decode_header(line):
107 '''Decode a header line to 8bit.'''
108 newline = ''
109 while 1:
110 i = mime_head.search(line)
111 if i < 0:
112 break
113 match = mime_head.group(0, 1)
114 newline = newline + line[:i] + mime_decode(match[1])
115 line = line[i + len(match[0]):]
116 return newline + line
117
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000118def unmimify_part(ifile, ofile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000119 '''Convert a quoted-printable part of a MIME mail message to 8bit.'''
120 multipart = None
121 quoted_printable = 0
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000122 is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000123 is_repl = 0
124 if ifile.boundary and ifile.boundary[:2] == QUOTE:
125 prefix = QUOTE
126 else:
127 prefix = ''
128
129 # read header
130 hfile = HeaderFile(ifile)
131 while 1:
132 line = hfile.readline()
133 if not line:
134 return
135 if prefix and line[:len(prefix)] == prefix:
136 line = line[len(prefix):]
137 pref = prefix
138 else:
139 pref = ''
140 line = mime_decode_header(line)
141 if qp.match(line) >= 0:
142 quoted_printable = 1
143 continue # skip this header
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000144 if decode_base64 and base64_re.match(line) >= 0:
145 is_base64 = 1
146 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000147 ofile.write(pref + line)
148 if not prefix and repl.match(line) >= 0:
149 # we're dealing with a reply message
150 is_repl = 1
151 if mp.match(line) >= 0:
152 multipart = '--' + mp.group(1)
153 if he.match(line) >= 0:
154 break
155 if is_repl and (quoted_printable or multipart):
156 is_repl = 0
157
158 # read body
159 while 1:
160 line = ifile.readline()
161 if not line:
162 return
163 line = regsub.gsub(mime_head, '\\1', line)
164 if prefix and line[:len(prefix)] == prefix:
165 line = line[len(prefix):]
166 pref = prefix
167 else:
168 pref = ''
169## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
170## multipart = line[:-1]
171 while multipart:
172 if line == multipart + '--\n':
173 ofile.write(pref + line)
174 multipart = None
175 line = None
176 break
177 if line == multipart + '\n':
178 ofile.write(pref + line)
179 nifile = File(ifile, multipart)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000180 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000181 line = nifile.peek
182 continue
183 # not a boundary between parts
184 break
185 if line and quoted_printable:
186 while line[-2:] == '=\n':
187 line = line[:-2]
188 newline = ifile.readline()
189 if newline[:len(QUOTE)] == QUOTE:
190 newline = newline[len(QUOTE):]
191 line = line + newline
192 line = mime_decode(line)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000193 if line and is_base64 and not pref:
194 import base64
195 line = base64.decodestring(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000196 if line:
197 ofile.write(pref + line)
198
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000199def unmimify(infile, outfile, decode_base64 = 0):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000200 '''Convert quoted-printable parts of a MIME mail message to 8bit.'''
201 if type(infile) == type(''):
202 ifile = open(infile)
203 if type(outfile) == type('') and infile == outfile:
204 import os
205 d, f = os.path.split(infile)
206 os.rename(infile, os.path.join(d, ',' + f))
207 else:
208 ifile = infile
209 if type(outfile) == type(''):
210 ofile = open(outfile, 'w')
211 else:
212 ofile = outfile
213 nifile = File(ifile, None)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000214 unmimify_part(nifile, ofile, decode_base64)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000215 ofile.flush()
216
217mime_char = regex.compile('[=\240-\377]') # quote these chars in body
218mime_header_char = regex.compile('[=?\240-\377]') # quote these in header
219
220def mime_encode(line, header):
221 '''Code a single line as quoted-printable.
222 If header is set, quote some extra characters.'''
223 if header:
224 reg = mime_header_char
225 else:
226 reg = mime_char
227 newline = ''
228 if len(line) >= 5 and line[:5] == 'From ':
229 # quote 'From ' at the start of a line for stupid mailers
230 newline = string.upper('=%02x' % ord('F'))
231 line = line[1:]
232 while 1:
233 i = reg.search(line)
234 if i < 0:
235 break
236 newline = newline + line[:i] + \
237 string.upper('=%02x' % ord(line[i]))
238 line = line[i+1:]
239 line = newline + line
240
241 newline = ''
242 while len(line) >= 75:
243 i = 73
244 while line[i] == '=' or line[i-1] == '=':
245 i = i - 1
246 i = i + 1
247 newline = newline + line[:i] + '=\n'
248 line = line[i:]
249 return newline + line
250
Guido van Rossum68ded211996-05-28 19:52:40 +0000251mime_header = regex.compile('\\([ \t(]\\|^\\)\\([-a-zA-Z0-9_+]*[\240-\377][-a-zA-Z0-9_+\240-\377]*\\)\\([ \t)]\\|$\\)')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000252
253def mime_encode_header(line):
254 '''Code a single header line as quoted-printable.'''
255 newline = ''
256 while 1:
257 i = mime_header.search(line)
258 if i < 0:
259 break
260 newline = newline + line[:i] + mime_header.group(1) + \
261 '=?' + CHARSET + '?Q?' + \
262 mime_encode(mime_header.group(2), 1) + \
263 '?=' + mime_header.group(3)
264 line = line[i+len(mime_header.group(0)):]
265 return newline + line
266
267mv = regex.compile('^mime-version:', regex.casefold)
268cte = regex.compile('^content-transfer-encoding:', regex.casefold)
269iso_char = regex.compile('[\240-\377]')
270
271def mimify_part(ifile, ofile, is_mime):
272 '''Convert an 8bit part of a MIME mail message to quoted-printable.'''
Guido van Rossum69155681996-06-10 19:04:02 +0000273 has_cte = is_qp = is_base64 = 0
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000274 multipart = None
275 must_quote_body = must_quote_header = has_iso_chars = 0
276
277 header = []
278 header_end = ''
279 message = []
280 message_end = ''
281 # read header
282 hfile = HeaderFile(ifile)
283 while 1:
284 line = hfile.readline()
285 if not line:
286 break
287 if not must_quote_header and iso_char.search(line) >= 0:
288 must_quote_header = 1
289 if mv.match(line) >= 0:
290 is_mime = 1
291 if cte.match(line) >= 0:
292 has_cte = 1
293 if qp.match(line) >= 0:
294 is_qp = 1
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000295 elif base64_re.match(line) >= 0:
Guido van Rossum69155681996-06-10 19:04:02 +0000296 is_base64 = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000297 if mp.match(line) >= 0:
298 multipart = '--' + mp.group(1)
299 if he.match(line) >= 0:
300 header_end = line
301 break
302 header.append(line)
303
304 # read body
305 while 1:
306 line = ifile.readline()
307 if not line:
308 break
309 if multipart:
310 if line == multipart + '--\n':
311 message_end = line
312 break
313 if line == multipart + '\n':
314 message_end = line
315 break
Guido van Rossum69155681996-06-10 19:04:02 +0000316 if is_base64:
317 message.append(line)
318 continue
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000319 if is_qp:
320 while line[-2:] == '=\n':
321 line = line[:-2]
322 newline = ifile.readline()
323 if newline[:len(QUOTE)] == QUOTE:
324 newline = newline[len(QUOTE):]
325 line = line + newline
326 line = mime_decode(line)
327 message.append(line)
328 if not has_iso_chars:
329 if iso_char.search(line) >= 0:
330 has_iso_chars = must_quote_body = 1
331 if not must_quote_body:
332 if len(line) > MAXLEN:
333 must_quote_body = 1
334
335 # convert and output header and body
336 for line in header:
337 if must_quote_header:
338 line = mime_encode_header(line)
339 if chrset.match(line) >= 0:
340 if has_iso_chars:
341 # change us-ascii into iso-8859-1
342 if string.lower(chrset.group(2)) == 'us-ascii':
343 line = chrset.group(1) + \
344 CHARSET + chrset.group(3)
345 else:
346 # change iso-8859-* into us-ascii
347 line = chrset.group(1) + 'us-ascii' + chrset.group(3)
348 if has_cte and cte.match(line) >= 0:
349 line = 'Content-Transfer-Encoding: '
Guido van Rossum69155681996-06-10 19:04:02 +0000350 if is_base64:
351 line = line + 'base64\n'
352 elif must_quote_body:
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000353 line = line + 'quoted-printable\n'
354 else:
355 line = line + '7bit\n'
356 ofile.write(line)
357 if (must_quote_header or must_quote_body) and not is_mime:
358 ofile.write('Mime-Version: 1.0\n')
359 ofile.write('Content-Type: text/plain; ')
360 if has_iso_chars:
361 ofile.write('charset="%s"\n' % CHARSET)
362 else:
363 ofile.write('charset="us-ascii"\n')
364 if must_quote_body and not has_cte:
365 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
366 ofile.write(header_end)
367
368 for line in message:
369 if must_quote_body:
370 line = mime_encode(line, 0)
371 ofile.write(line)
372 ofile.write(message_end)
373
374 line = message_end
375 while multipart:
376 if line == multipart + '--\n':
Guido van Rossumf789ee41997-03-20 14:42:17 +0000377 # read bit after the end of the last part
378 while 1:
379 line = ifile.readline()
380 if not line:
381 return
382 if must_quote_body:
383 line = mime_encode(line, 0)
384 ofile.write(line)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000385 if line == multipart + '\n':
386 nifile = File(ifile, multipart)
387 mimify_part(nifile, ofile, 1)
388 line = nifile.peek
389 ofile.write(line)
390 continue
391
392def mimify(infile, outfile):
393 '''Convert 8bit parts of a MIME mail message to quoted-printable.'''
394 if type(infile) == type(''):
395 ifile = open(infile)
396 if type(outfile) == type('') and infile == outfile:
397 import os
398 d, f = os.path.split(infile)
399 os.rename(infile, os.path.join(d, ',' + f))
400 else:
401 ifile = infile
402 if type(outfile) == type(''):
403 ofile = open(outfile, 'w')
404 else:
405 ofile = outfile
406 nifile = File(ifile, None)
407 mimify_part(nifile, ofile, 0)
408 ofile.flush()
409
410import sys
411if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
412 import getopt
413 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
414
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000415 decode_base64 = 0
416 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000417 if len(args) not in (0, 1, 2):
418 print usage
419 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000420 if (('-e', '') in opts) == (('-d', '') in opts) or \
421 ((('-b', '') in opts) and (('-d', '') not in opts)):
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000422 print usage
423 sys.exit(1)
424 for o, a in opts:
425 if o == '-e':
426 encode = mimify
427 elif o == '-d':
428 encode = unmimify
429 elif o == '-l':
430 try:
431 MAXLEN = string.atoi(a)
432 except:
433 print usage
434 sys.exit(1)
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000435 elif o == '-b':
436 decode_base64 = 1
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000437 if len(args) == 0:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000438 encode_args = (sys.stdin, sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000439 elif len(args) == 1:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000440 encode_args = (args[0], sys.stdout)
Sjoerd Mullendere8a0a5c1996-02-14 10:40:03 +0000441 else:
Guido van Rossuma3d9e021997-04-11 15:22:56 +0000442 encode_args = (args[0], args[1])
443 if decode_base64:
444 encode_args = encode_args + (decode_base64,)
445 apply(encode, encode_args)