blob: 465903f88d37b2988be679268384f21b002e3969 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'parseaddr',
17 'parsedate',
18 'parsedate_tz',
19 'unquote',
20 ]
21
22import os
23import re
24import time
25import base64
26import random
27import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +000028import urllib.parse
Guido van Rossum8b3febe2007-08-30 01:15:14 +000029import warnings
30from io import StringIO
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = ''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
Antoine Pitroufd036452008-08-19 17:56:33 +000055
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056# Helpers
57
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058def formataddr(pair):
59 """The inverse of parseaddr(), this takes a 2-tuple of the form
60 (realname, email_address) and returns the string value suitable
61 for an RFC 2822 From, To or Cc header.
62
63 If the first element of pair is false, then the second element is
64 returned unmodified.
65 """
66 name, address = pair
67 if name:
68 quotes = ''
69 if specialsre.search(name):
70 quotes = '"'
71 name = escapesre.sub(r'\\\g<0>', name)
72 return '%s%s%s <%s>' % (quotes, name, quotes, address)
73 return address
74
75
Antoine Pitroufd036452008-08-19 17:56:33 +000076
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077def getaddresses(fieldvalues):
78 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
79 all = COMMASPACE.join(fieldvalues)
80 a = _AddressList(all)
81 return a.addresslist
82
83
Antoine Pitroufd036452008-08-19 17:56:33 +000084
Guido van Rossum8b3febe2007-08-30 01:15:14 +000085ecre = re.compile(r'''
86 =\? # literal =?
87 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
88 \? # literal ?
89 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
90 \? # literal ?
91 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
92 \?= # literal ?=
93 ''', re.VERBOSE | re.IGNORECASE)
94
95
Antoine Pitroufd036452008-08-19 17:56:33 +000096
Guido van Rossum8b3febe2007-08-30 01:15:14 +000097def formatdate(timeval=None, localtime=False, usegmt=False):
98 """Returns a date string as specified by RFC 2822, e.g.:
99
100 Fri, 09 Nov 2001 01:08:47 -0000
101
102 Optional timeval if given is a floating point time value as accepted by
103 gmtime() and localtime(), otherwise the current time is used.
104
105 Optional localtime is a flag that when True, interprets timeval, and
106 returns a date relative to the local timezone instead of UTC, properly
107 taking daylight savings time into account.
108
109 Optional argument usegmt means that the timezone is written out as
110 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
111 is needed for HTTP, and is only used when localtime==False.
112 """
113 # Note: we cannot use strftime() because that honors the locale and RFC
114 # 2822 requires that day and month names be the English abbreviations.
115 if timeval is None:
116 timeval = time.time()
117 if localtime:
118 now = time.localtime(timeval)
119 # Calculate timezone offset, based on whether the local zone has
120 # daylight savings time, and whether DST is in effect.
121 if time.daylight and now[-1]:
122 offset = time.altzone
123 else:
124 offset = time.timezone
125 hours, minutes = divmod(abs(offset), 3600)
126 # Remember offset is in seconds west of UTC, but the timezone is in
127 # minutes east of UTC, so the signs differ.
128 if offset > 0:
129 sign = '-'
130 else:
131 sign = '+'
132 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
133 else:
134 now = time.gmtime(timeval)
135 # Timezone offset is always -0000
136 if usegmt:
137 zone = 'GMT'
138 else:
139 zone = '-0000'
140 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
141 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
142 now[2],
143 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
144 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
145 now[0], now[3], now[4], now[5],
146 zone)
147
148
Antoine Pitroufd036452008-08-19 17:56:33 +0000149
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000150def make_msgid(idstring=None):
151 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
152
153 <20020201195627.33539.96671@nightshade.la.mastaler.com>
154
155 Optional idstring if given is a string used to strengthen the
156 uniqueness of the message id.
157 """
158 timeval = time.time()
159 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
160 pid = os.getpid()
161 randint = random.randrange(100000)
162 if idstring is None:
163 idstring = ''
164 else:
165 idstring = '.' + idstring
166 idhost = socket.getfqdn()
167 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
168 return msgid
169
170
Antoine Pitroufd036452008-08-19 17:56:33 +0000171
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000172# These functions are in the standalone mimelib version only because they've
173# subsequently been fixed in the latest Python versions. We use this to worm
174# around broken older Pythons.
175def parsedate(data):
176 if not data:
177 return None
178 return _parsedate(data)
179
180
181def parsedate_tz(data):
182 if not data:
183 return None
184 return _parsedate_tz(data)
185
186
187def parseaddr(addr):
188 addrs = _AddressList(addr).addresslist
189 if not addrs:
190 return '', ''
191 return addrs[0]
192
193
194# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
195def unquote(str):
196 """Remove quotes from a string."""
197 if len(str) > 1:
198 if str.startswith('"') and str.endswith('"'):
199 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
200 if str.startswith('<') and str.endswith('>'):
201 return str[1:-1]
202 return str
203
204
Antoine Pitroufd036452008-08-19 17:56:33 +0000205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206# RFC2231-related functions - parameter encoding and decoding
207def decode_rfc2231(s):
208 """Decode string according to RFC 2231"""
209 parts = s.split(TICK, 2)
210 if len(parts) <= 2:
211 return None, None, s
212 return parts
213
214
215def encode_rfc2231(s, charset=None, language=None):
216 """Encode string according to RFC 2231.
217
218 If neither charset nor language is given, then s is returned as-is. If
219 charset is given but not language, the string is encoded using the empty
220 string for language.
221 """
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000222 s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000223 if charset is None and language is None:
224 return s
225 if language is None:
226 language = ''
227 return "%s'%s'%s" % (charset, language, s)
228
229
Antoine Pitroufd036452008-08-19 17:56:33 +0000230rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
231 re.ASCII)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000232
233def decode_params(params):
234 """Decode parameters list according to RFC 2231.
235
236 params is a sequence of 2-tuples containing (param name, string value).
237 """
238 # Copy params so we don't mess with the original
239 params = params[:]
240 new_params = []
241 # Map parameter's name to a list of continuations. The values are a
242 # 3-tuple of the continuation number, the string value, and a flag
243 # specifying whether a particular segment is %-encoded.
244 rfc2231_params = {}
245 name, value = params.pop(0)
246 new_params.append((name, value))
247 while params:
248 name, value = params.pop(0)
249 if name.endswith('*'):
250 encoded = True
251 else:
252 encoded = False
253 value = unquote(value)
254 mo = rfc2231_continuation.match(name)
255 if mo:
256 name, num = mo.group('name', 'num')
257 if num is not None:
258 num = int(num)
259 rfc2231_params.setdefault(name, []).append((num, value, encoded))
260 else:
261 new_params.append((name, '"%s"' % quote(value)))
262 if rfc2231_params:
263 for name, continuations in rfc2231_params.items():
264 value = []
265 extended = False
266 # Sort by number
267 continuations.sort()
268 # And now append all values in numerical order, converting
269 # %-encodings for the encoded segments. If any of the
270 # continuation names ends in a *, then the entire string, after
271 # decoding segments and concatenating, must have the charset and
272 # language specifiers at the beginning of the string.
273 for num, s, encoded in continuations:
274 if encoded:
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000275 # Decode as "latin-1", so the characters in s directly
276 # represent the percent-encoded octet values.
277 # collapse_rfc2231_value treats this as an octet sequence.
278 s = urllib.parse.unquote(s, encoding="latin-1")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000279 extended = True
280 value.append(s)
281 value = quote(EMPTYSTRING.join(value))
282 if extended:
283 charset, language, value = decode_rfc2231(value)
284 new_params.append((name, (charset, language, '"%s"' % value)))
285 else:
286 new_params.append((name, '"%s"' % value))
287 return new_params
288
289def collapse_rfc2231_value(value, errors='replace',
290 fallback_charset='us-ascii'):
291 if not isinstance(value, tuple) or len(value) != 3:
292 return unquote(value)
293 # While value comes to us as a unicode string, we need it to be a bytes
294 # object. We do not want bytes() normal utf-8 decoder, we want a straight
295 # interpretation of the string as character bytes.
296 charset, language, text = value
Guido van Rossum9604e662007-08-30 03:46:43 +0000297 rawbytes = bytes(text, 'raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000298 try:
299 return str(rawbytes, charset, errors)
300 except LookupError:
301 # charset is not a known codec.
302 return unquote(text)