blob: 0439aff938f973fe0886c47644a738292fb9b0df [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'parseaddr',
17 'parsedate',
18 'parsedate_tz',
19 'unquote',
20 ]
21
22import os
23import re
24import time
25import base64
26import random
27import socket
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028import warnings
29from io import StringIO
30
31from email._parseaddr import quote
32from email._parseaddr import AddressList as _AddressList
33from email._parseaddr import mktime_tz
34
35# We need wormarounds for bugs in these methods in older Pythons (see below)
36from email._parseaddr import parsedate as _parsedate
37from email._parseaddr import parsedate_tz as _parsedate_tz
38
39from quopri import decodestring as _qdecode
40
41# Intrapackage imports
42from email.encoders import _bencode, _qencode
43
44COMMASPACE = ', '
45EMPTYSTRING = ''
46UEMPTYSTRING = ''
47CRLF = '\r\n'
48TICK = "'"
49
50specialsre = re.compile(r'[][\\()<>@,:;".]')
51escapesre = re.compile(r'[][\\()"]')
52
53
54
55# Helpers
56
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057def formataddr(pair):
58 """The inverse of parseaddr(), this takes a 2-tuple of the form
59 (realname, email_address) and returns the string value suitable
60 for an RFC 2822 From, To or Cc header.
61
62 If the first element of pair is false, then the second element is
63 returned unmodified.
64 """
65 name, address = pair
66 if name:
67 quotes = ''
68 if specialsre.search(name):
69 quotes = '"'
70 name = escapesre.sub(r'\\\g<0>', name)
71 return '%s%s%s <%s>' % (quotes, name, quotes, address)
72 return address
73
74
75
76def getaddresses(fieldvalues):
77 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
78 all = COMMASPACE.join(fieldvalues)
79 a = _AddressList(all)
80 return a.addresslist
81
82
83
84ecre = re.compile(r'''
85 =\? # literal =?
86 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
87 \? # literal ?
88 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
89 \? # literal ?
90 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
91 \?= # literal ?=
92 ''', re.VERBOSE | re.IGNORECASE)
93
94
95
96def formatdate(timeval=None, localtime=False, usegmt=False):
97 """Returns a date string as specified by RFC 2822, e.g.:
98
99 Fri, 09 Nov 2001 01:08:47 -0000
100
101 Optional timeval if given is a floating point time value as accepted by
102 gmtime() and localtime(), otherwise the current time is used.
103
104 Optional localtime is a flag that when True, interprets timeval, and
105 returns a date relative to the local timezone instead of UTC, properly
106 taking daylight savings time into account.
107
108 Optional argument usegmt means that the timezone is written out as
109 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
110 is needed for HTTP, and is only used when localtime==False.
111 """
112 # Note: we cannot use strftime() because that honors the locale and RFC
113 # 2822 requires that day and month names be the English abbreviations.
114 if timeval is None:
115 timeval = time.time()
116 if localtime:
117 now = time.localtime(timeval)
118 # Calculate timezone offset, based on whether the local zone has
119 # daylight savings time, and whether DST is in effect.
120 if time.daylight and now[-1]:
121 offset = time.altzone
122 else:
123 offset = time.timezone
124 hours, minutes = divmod(abs(offset), 3600)
125 # Remember offset is in seconds west of UTC, but the timezone is in
126 # minutes east of UTC, so the signs differ.
127 if offset > 0:
128 sign = '-'
129 else:
130 sign = '+'
131 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
132 else:
133 now = time.gmtime(timeval)
134 # Timezone offset is always -0000
135 if usegmt:
136 zone = 'GMT'
137 else:
138 zone = '-0000'
139 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
140 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
141 now[2],
142 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
143 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
144 now[0], now[3], now[4], now[5],
145 zone)
146
147
148
149def make_msgid(idstring=None):
150 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
151
152 <20020201195627.33539.96671@nightshade.la.mastaler.com>
153
154 Optional idstring if given is a string used to strengthen the
155 uniqueness of the message id.
156 """
157 timeval = time.time()
158 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
159 pid = os.getpid()
160 randint = random.randrange(100000)
161 if idstring is None:
162 idstring = ''
163 else:
164 idstring = '.' + idstring
165 idhost = socket.getfqdn()
166 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
167 return msgid
168
169
170
171# These functions are in the standalone mimelib version only because they've
172# subsequently been fixed in the latest Python versions. We use this to worm
173# around broken older Pythons.
174def parsedate(data):
175 if not data:
176 return None
177 return _parsedate(data)
178
179
180def parsedate_tz(data):
181 if not data:
182 return None
183 return _parsedate_tz(data)
184
185
186def parseaddr(addr):
187 addrs = _AddressList(addr).addresslist
188 if not addrs:
189 return '', ''
190 return addrs[0]
191
192
193# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
194def unquote(str):
195 """Remove quotes from a string."""
196 if len(str) > 1:
197 if str.startswith('"') and str.endswith('"'):
198 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
199 if str.startswith('<') and str.endswith('>'):
200 return str[1:-1]
201 return str
202
203
204
205# RFC2231-related functions - parameter encoding and decoding
206def decode_rfc2231(s):
207 """Decode string according to RFC 2231"""
208 parts = s.split(TICK, 2)
209 if len(parts) <= 2:
210 return None, None, s
211 return parts
212
213
214def encode_rfc2231(s, charset=None, language=None):
215 """Encode string according to RFC 2231.
216
217 If neither charset nor language is given, then s is returned as-is. If
218 charset is given but not language, the string is encoded using the empty
219 string for language.
220 """
221 import urllib
222 s = urllib.quote(s, safe='')
223 if charset is None and language is None:
224 return s
225 if language is None:
226 language = ''
227 return "%s'%s'%s" % (charset, language, s)
228
229
230rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
231
232def decode_params(params):
233 """Decode parameters list according to RFC 2231.
234
235 params is a sequence of 2-tuples containing (param name, string value).
236 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000237 import urllib
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000238 # Copy params so we don't mess with the original
239 params = params[:]
240 new_params = []
241 # Map parameter's name to a list of continuations. The values are a
242 # 3-tuple of the continuation number, the string value, and a flag
243 # specifying whether a particular segment is %-encoded.
244 rfc2231_params = {}
245 name, value = params.pop(0)
246 new_params.append((name, value))
247 while params:
248 name, value = params.pop(0)
249 if name.endswith('*'):
250 encoded = True
251 else:
252 encoded = False
253 value = unquote(value)
254 mo = rfc2231_continuation.match(name)
255 if mo:
256 name, num = mo.group('name', 'num')
257 if num is not None:
258 num = int(num)
259 rfc2231_params.setdefault(name, []).append((num, value, encoded))
260 else:
261 new_params.append((name, '"%s"' % quote(value)))
262 if rfc2231_params:
263 for name, continuations in rfc2231_params.items():
264 value = []
265 extended = False
266 # Sort by number
267 continuations.sort()
268 # And now append all values in numerical order, converting
269 # %-encodings for the encoded segments. If any of the
270 # continuation names ends in a *, then the entire string, after
271 # decoding segments and concatenating, must have the charset and
272 # language specifiers at the beginning of the string.
273 for num, s, encoded in continuations:
274 if encoded:
275 s = urllib.unquote(s)
276 extended = True
277 value.append(s)
278 value = quote(EMPTYSTRING.join(value))
279 if extended:
280 charset, language, value = decode_rfc2231(value)
281 new_params.append((name, (charset, language, '"%s"' % value)))
282 else:
283 new_params.append((name, '"%s"' % value))
284 return new_params
285
286def collapse_rfc2231_value(value, errors='replace',
287 fallback_charset='us-ascii'):
288 if not isinstance(value, tuple) or len(value) != 3:
289 return unquote(value)
290 # While value comes to us as a unicode string, we need it to be a bytes
291 # object. We do not want bytes() normal utf-8 decoder, we want a straight
292 # interpretation of the string as character bytes.
293 charset, language, text = value
Guido van Rossum9604e662007-08-30 03:46:43 +0000294 rawbytes = bytes(text, 'raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000295 try:
296 return str(rawbytes, charset, errors)
297 except LookupError:
298 # charset is not a known codec.
299 return unquote(text)