blob: 6bbf2d35cb14657630f08ffe8ba10c1bc7e409ab [file] [log] [blame]
Barry Warsawba925802001-09-23 03:17:28 +00001# Copyright (C) 2001 Python Software Foundation
2# Author: barry@zope.com (Barry Warsaw)
3
4"""Miscellaneous utilities.
5"""
6
7import re
8
9from rfc822 import unquote, quote, parseaddr
10from rfc822 import dump_address_pair
11from rfc822 import AddrlistClass as _AddrlistClass
12from rfc822 import parsedate_tz, parsedate, mktime_tz, formatdate
13
14from quopri import decodestring as _qdecode
15import base64
16
17# Intrapackage imports
18from Encoders import _bencode, _qencode
19
20COMMASPACE = ', '
21UEMPTYSTRING = u''
22
23
Barry Warsawe968ead2001-10-04 17:05:11 +000024
Barry Warsawba925802001-09-23 03:17:28 +000025# Helpers
26
27def _identity(s):
28 return s
29
30
31def _bdecode(s):
32 if not s:
33 return s
34 # We can't quite use base64.encodestring() since it tacks on a "courtesy
35 # newline". Blech!
36 if not s:
37 return s
38 hasnewline = (s[-1] == '\n')
39 value = base64.decodestring(s)
40 if not hasnewline and value[-1] == '\n':
41 return value[:-1]
42 return value
43
44
Barry Warsawe968ead2001-10-04 17:05:11 +000045
Barry Warsawba925802001-09-23 03:17:28 +000046def getaddresses(fieldvalues):
47 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
48 all = COMMASPACE.join(fieldvalues)
49 a = _AddrlistClass(all)
50 return a.getaddrlist()
51
52
Barry Warsawe968ead2001-10-04 17:05:11 +000053
Barry Warsawba925802001-09-23 03:17:28 +000054ecre = re.compile(r'''
55 =\? # literal =?
56 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
57 \? # literal ?
58 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
59 \? # literal ?
60 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
61 \?= # literal ?=
62 ''', re.VERBOSE | re.IGNORECASE)
63
64
65def decode(s):
66 """Return a decoded string according to RFC 2047, as a unicode string."""
67 rtn = []
68 parts = ecre.split(s, 1)
69 while parts:
70 # If there are less than 4 parts, it can't be encoded and we're done
71 if len(parts) < 5:
72 rtn.extend(parts)
73 break
74 # The first element is any non-encoded leading text
75 rtn.append(parts[0])
76 charset = parts[1]
77 encoding = parts[2]
78 atom = parts[3]
79 # The next chunk to decode should be in parts[4]
80 parts = ecre.split(parts[4])
81 # The encoding must be either `q' or `b', case-insensitive
82 if encoding.lower() == 'q':
83 func = _qdecode
84 elif encoding.lower() == 'b':
85 func = _bdecode
86 else:
87 func = _identity
88 # Decode and get the unicode in the charset
89 rtn.append(unicode(func(atom), charset))
90 # Now that we've decoded everything, we just need to join all the parts
91 # together into the final string.
92 return UEMPTYSTRING.join(rtn)
93
94
Barry Warsawe968ead2001-10-04 17:05:11 +000095
Barry Warsawba925802001-09-23 03:17:28 +000096def encode(s, charset='iso-8859-1', encoding='q'):
97 """Encode a string according to RFC 2047."""
98 if encoding.lower() == 'q':
99 estr = _qencode(s)
100 elif encoding.lower() == 'b':
101 estr = _bencode(s)
102 else:
103 raise ValueError, 'Illegal encoding code: ' + encoding
104 return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr)