blob: 6b6d7f4474479ef303b55131e1ee00deca82f605 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
R David Murray875048b2011-07-20 11:41:21 -040014 'format_datetime',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015 'getaddresses',
16 'make_msgid',
Barry Warsawb742a962009-11-25 18:45:15 +000017 'mktime_tz',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000018 'parseaddr',
19 'parsedate',
20 'parsedate_tz',
R David Murray875048b2011-07-20 11:41:21 -040021 'parsedate_to_datetime',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022 'unquote',
23 ]
24
25import os
26import re
27import time
28import base64
29import random
30import socket
R David Murray875048b2011-07-20 11:41:21 -040031import datetime
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032import urllib.parse
Guido van Rossum8b3febe2007-08-30 01:15:14 +000033import warnings
34from io import StringIO
35
36from email._parseaddr import quote
37from email._parseaddr import AddressList as _AddressList
38from email._parseaddr import mktime_tz
39
Georg Brandl1aca31e2012-09-22 09:03:56 +020040from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42from quopri import decodestring as _qdecode
43
44# Intrapackage imports
45from email.encoders import _bencode, _qencode
R David Murray8debacb2011-04-06 09:35:57 -040046from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047
48COMMASPACE = ', '
49EMPTYSTRING = ''
50UEMPTYSTRING = ''
51CRLF = '\r\n'
52TICK = "'"
53
54specialsre = re.compile(r'[][\\()<>@,:;".]')
R David Murrayb53319f2012-03-14 15:31:47 -040055escapesre = re.compile(r'[\\"]')
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056
R David Murrayc27e5222012-05-25 15:01:48 -040057# How to figure out if we are processing strings that come from a byte
58# source with undecodable characters.
59_has_surrogates = re.compile(
60 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
Guido van Rossum8b3febe2007-08-30 01:15:14 +000061
R David Murray0b6f6c82012-05-25 18:42:14 -040062# How to deal with a string containing bytes before handing it to the
63# application through the 'normal' interface.
64def _sanitize(string):
65 # Turn any escaped bytes into unicode 'unknown' char.
66 original_bytes = string.encode('ascii', 'surrogateescape')
67 return original_bytes.decode('ascii', 'replace')
68
Antoine Pitroufd036452008-08-19 17:56:33 +000069
Guido van Rossum8b3febe2007-08-30 01:15:14 +000070# Helpers
71
R David Murray8debacb2011-04-06 09:35:57 -040072def formataddr(pair, charset='utf-8'):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073 """The inverse of parseaddr(), this takes a 2-tuple of the form
74 (realname, email_address) and returns the string value suitable
75 for an RFC 2822 From, To or Cc header.
76
77 If the first element of pair is false, then the second element is
78 returned unmodified.
R David Murray8debacb2011-04-06 09:35:57 -040079
80 Optional charset if given is the character set that is used to encode
81 realname in case realname is not ASCII safe. Can be an instance of str or
82 a Charset-like object which has a header_encode method. Default is
83 'utf-8'.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000084 """
85 name, address = pair
R David Murray8debacb2011-04-06 09:35:57 -040086 # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't.
87 address.encode('ascii')
Guido van Rossum8b3febe2007-08-30 01:15:14 +000088 if name:
R David Murray8debacb2011-04-06 09:35:57 -040089 try:
90 name.encode('ascii')
91 except UnicodeEncodeError:
92 if isinstance(charset, str):
93 charset = Charset(charset)
94 encoded_name = charset.header_encode(name)
95 return "%s <%s>" % (encoded_name, address)
96 else:
97 quotes = ''
98 if specialsre.search(name):
99 quotes = '"'
100 name = escapesre.sub(r'\\\g<0>', name)
101 return '%s%s%s <%s>' % (quotes, name, quotes, address)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000102 return address
103
104
Antoine Pitroufd036452008-08-19 17:56:33 +0000105
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000106def getaddresses(fieldvalues):
107 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
108 all = COMMASPACE.join(fieldvalues)
109 a = _AddressList(all)
110 return a.addresslist
111
112
Antoine Pitroufd036452008-08-19 17:56:33 +0000113
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000114ecre = re.compile(r'''
115 =\? # literal =?
116 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
117 \? # literal ?
118 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
119 \? # literal ?
120 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
121 \?= # literal ?=
122 ''', re.VERBOSE | re.IGNORECASE)
123
124
R David Murray875048b2011-07-20 11:41:21 -0400125def _format_timetuple_and_zone(timetuple, zone):
126 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
127 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
128 timetuple[2],
129 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
130 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
131 timetuple[0], timetuple[3], timetuple[4], timetuple[5],
132 zone)
Antoine Pitroufd036452008-08-19 17:56:33 +0000133
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000134def formatdate(timeval=None, localtime=False, usegmt=False):
135 """Returns a date string as specified by RFC 2822, e.g.:
136
137 Fri, 09 Nov 2001 01:08:47 -0000
138
139 Optional timeval if given is a floating point time value as accepted by
140 gmtime() and localtime(), otherwise the current time is used.
141
142 Optional localtime is a flag that when True, interprets timeval, and
143 returns a date relative to the local timezone instead of UTC, properly
144 taking daylight savings time into account.
145
146 Optional argument usegmt means that the timezone is written out as
147 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
148 is needed for HTTP, and is only used when localtime==False.
149 """
150 # Note: we cannot use strftime() because that honors the locale and RFC
151 # 2822 requires that day and month names be the English abbreviations.
152 if timeval is None:
153 timeval = time.time()
154 if localtime:
155 now = time.localtime(timeval)
156 # Calculate timezone offset, based on whether the local zone has
157 # daylight savings time, and whether DST is in effect.
158 if time.daylight and now[-1]:
159 offset = time.altzone
160 else:
161 offset = time.timezone
162 hours, minutes = divmod(abs(offset), 3600)
163 # Remember offset is in seconds west of UTC, but the timezone is in
164 # minutes east of UTC, so the signs differ.
165 if offset > 0:
166 sign = '-'
167 else:
168 sign = '+'
169 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
170 else:
171 now = time.gmtime(timeval)
172 # Timezone offset is always -0000
173 if usegmt:
174 zone = 'GMT'
175 else:
176 zone = '-0000'
R David Murray875048b2011-07-20 11:41:21 -0400177 return _format_timetuple_and_zone(now, zone)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000178
R David Murray875048b2011-07-20 11:41:21 -0400179def format_datetime(dt, usegmt=False):
180 """Turn a datetime into a date string as specified in RFC 2822.
181
182 If usegmt is True, dt must be an aware datetime with an offset of zero. In
183 this case 'GMT' will be rendered instead of the normal +0000 required by
184 RFC2822. This is to support HTTP headers involving date stamps.
185 """
186 now = dt.timetuple()
187 if usegmt:
188 if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
189 raise ValueError("usegmt option requires a UTC datetime")
190 zone = 'GMT'
191 elif dt.tzinfo is None:
192 zone = '-0000'
193 else:
194 zone = dt.strftime("%z")
195 return _format_timetuple_and_zone(now, zone)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000196
Antoine Pitroufd036452008-08-19 17:56:33 +0000197
R. David Murraya0b44b52010-12-02 21:47:19 +0000198def make_msgid(idstring=None, domain=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000199 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
200
201 <20020201195627.33539.96671@nightshade.la.mastaler.com>
202
203 Optional idstring if given is a string used to strengthen the
R. David Murraya0b44b52010-12-02 21:47:19 +0000204 uniqueness of the message id. Optional domain if given provides the
205 portion of the message id after the '@'. It defaults to the locally
206 defined hostname.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 """
208 timeval = time.time()
209 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
210 pid = os.getpid()
211 randint = random.randrange(100000)
212 if idstring is None:
213 idstring = ''
214 else:
215 idstring = '.' + idstring
R. David Murraya0b44b52010-12-02 21:47:19 +0000216 if domain is None:
217 domain = socket.getfqdn()
218 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000219 return msgid
220
221
R David Murray875048b2011-07-20 11:41:21 -0400222def parsedate_to_datetime(data):
Georg Brandl1aca31e2012-09-22 09:03:56 +0200223 *dtuple, tz = _parsedate_tz(data)
R David Murray875048b2011-07-20 11:41:21 -0400224 if tz is None:
225 return datetime.datetime(*dtuple[:6])
226 return datetime.datetime(*dtuple[:6],
227 tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229
230def parseaddr(addr):
231 addrs = _AddressList(addr).addresslist
232 if not addrs:
233 return '', ''
234 return addrs[0]
235
236
237# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
238def unquote(str):
239 """Remove quotes from a string."""
240 if len(str) > 1:
241 if str.startswith('"') and str.endswith('"'):
242 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
243 if str.startswith('<') and str.endswith('>'):
244 return str[1:-1]
245 return str
246
247
Antoine Pitroufd036452008-08-19 17:56:33 +0000248
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249# RFC2231-related functions - parameter encoding and decoding
250def decode_rfc2231(s):
251 """Decode string according to RFC 2231"""
252 parts = s.split(TICK, 2)
253 if len(parts) <= 2:
254 return None, None, s
255 return parts
256
257
258def encode_rfc2231(s, charset=None, language=None):
259 """Encode string according to RFC 2231.
260
261 If neither charset nor language is given, then s is returned as-is. If
262 charset is given but not language, the string is encoded using the empty
263 string for language.
264 """
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000265 s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000266 if charset is None and language is None:
267 return s
268 if language is None:
269 language = ''
270 return "%s'%s'%s" % (charset, language, s)
271
272
Antoine Pitroufd036452008-08-19 17:56:33 +0000273rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
274 re.ASCII)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000275
276def decode_params(params):
277 """Decode parameters list according to RFC 2231.
278
279 params is a sequence of 2-tuples containing (param name, string value).
280 """
281 # Copy params so we don't mess with the original
282 params = params[:]
283 new_params = []
284 # Map parameter's name to a list of continuations. The values are a
285 # 3-tuple of the continuation number, the string value, and a flag
286 # specifying whether a particular segment is %-encoded.
287 rfc2231_params = {}
288 name, value = params.pop(0)
289 new_params.append((name, value))
290 while params:
291 name, value = params.pop(0)
292 if name.endswith('*'):
293 encoded = True
294 else:
295 encoded = False
296 value = unquote(value)
297 mo = rfc2231_continuation.match(name)
298 if mo:
299 name, num = mo.group('name', 'num')
300 if num is not None:
301 num = int(num)
302 rfc2231_params.setdefault(name, []).append((num, value, encoded))
303 else:
304 new_params.append((name, '"%s"' % quote(value)))
305 if rfc2231_params:
306 for name, continuations in rfc2231_params.items():
307 value = []
308 extended = False
309 # Sort by number
310 continuations.sort()
311 # And now append all values in numerical order, converting
312 # %-encodings for the encoded segments. If any of the
313 # continuation names ends in a *, then the entire string, after
314 # decoding segments and concatenating, must have the charset and
315 # language specifiers at the beginning of the string.
316 for num, s, encoded in continuations:
317 if encoded:
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000318 # Decode as "latin-1", so the characters in s directly
319 # represent the percent-encoded octet values.
320 # collapse_rfc2231_value treats this as an octet sequence.
321 s = urllib.parse.unquote(s, encoding="latin-1")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000322 extended = True
323 value.append(s)
324 value = quote(EMPTYSTRING.join(value))
325 if extended:
326 charset, language, value = decode_rfc2231(value)
327 new_params.append((name, (charset, language, '"%s"' % value)))
328 else:
329 new_params.append((name, '"%s"' % value))
330 return new_params
331
332def collapse_rfc2231_value(value, errors='replace',
333 fallback_charset='us-ascii'):
334 if not isinstance(value, tuple) or len(value) != 3:
335 return unquote(value)
336 # While value comes to us as a unicode string, we need it to be a bytes
337 # object. We do not want bytes() normal utf-8 decoder, we want a straight
338 # interpretation of the string as character bytes.
339 charset, language, text = value
Guido van Rossum9604e662007-08-30 03:46:43 +0000340 rawbytes = bytes(text, 'raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 try:
342 return str(rawbytes, charset, errors)
343 except LookupError:
344 # charset is not a known codec.
345 return unquote(text)
R David Murrayd2d521e2012-05-25 23:22:59 -0400346
347
348#
349# datetime doesn't provide a localtime function yet, so provide one. Code
350# adapted from the patch in issue 9527. This may not be perfect, but it is
351# better than not having it.
352#
353
354def localtime(dt=None, isdst=-1):
355 """Return local time as an aware datetime object.
356
357 If called without arguments, return current time. Otherwise *dt*
358 argument should be a datetime instance, and it is converted to the
359 local time zone according to the system time zone database. If *dt* is
360 naive (that is, dt.tzinfo is None), it is assumed to be in local time.
361 In this case, a positive or zero value for *isdst* causes localtime to
362 presume initially that summer time (for example, Daylight Saving Time)
363 is or is not (respectively) in effect for the specified time. A
364 negative value for *isdst* causes the localtime() function to attempt
365 to divine whether summer time is in effect for the specified time.
366
367 """
368 if dt is None:
Alexander Belopolskyf9bd9142012-08-22 23:02:36 -0400369 return datetime.datetime.now(datetime.timezone.utc).astimezone()
R David Murrayb8687df2012-08-22 21:34:00 -0400370 if dt.tzinfo is not None:
371 return dt.astimezone()
372 # We have a naive datetime. Convert to a (localtime) timetuple and pass to
373 # system mktime together with the isdst hint. System mktime will return
374 # seconds since epoch.
375 tm = dt.timetuple()[:-1] + (isdst,)
376 seconds = time.mktime(tm)
377 localtm = time.localtime(seconds)
378 try:
379 delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
380 tz = datetime.timezone(delta, localtm.tm_zone)
381 except AttributeError:
382 # Compute UTC offset and compare with the value implied by tm_isdst.
383 # If the values match, use the zone name implied by tm_isdst.
R David Murray097a1202012-08-22 21:52:31 -0400384 delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
R David Murrayb8687df2012-08-22 21:34:00 -0400385 dst = time.daylight and localtm.tm_isdst > 0
386 gmtoff = -(time.altzone if dst else time.timezone)
387 if delta == datetime.timedelta(seconds=gmtoff):
388 tz = datetime.timezone(delta, time.tzname[dst])
R David Murrayd2d521e2012-05-25 23:22:59 -0400389 else:
R David Murrayb8687df2012-08-22 21:34:00 -0400390 tz = datetime.timezone(delta)
391 return dt.replace(tzinfo=tz)