blob: 9ba760116153ad2fe0fc97179ed22b0a895c787a [file] [log] [blame]
Barry Warsaw24f79762004-05-09 03:55:11 +00001# Copyright (C) 2001-2004 Python Software Foundation
Barry Warsawbb113862004-10-03 03:16:19 +00002# Author: Barry Warsaw
3# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00004
Barry Warsaw24f79762004-05-09 03:55:11 +00005"""Miscellaneous utilities."""
Barry Warsawba925802001-09-23 03:17:28 +00006
Barry Warsaw409a4c02002-04-10 21:01:31 +00007import os
Barry Warsaw24f79762004-05-09 03:55:11 +00008import re
9import time
10import base64
11import random
12import socket
Barry Warsaw409a4c02002-04-10 21:01:31 +000013import warnings
14from cStringIO import StringIO
Barry Warsawba925802001-09-23 03:17:28 +000015
Barry Warsaw030ddf72002-11-05 19:54:52 +000016from email._parseaddr import quote
17from email._parseaddr import AddressList as _AddressList
18from email._parseaddr import mktime_tz
Barry Warsaw409a4c02002-04-10 21:01:31 +000019
20# We need wormarounds for bugs in these methods in older Pythons (see below)
Barry Warsaw030ddf72002-11-05 19:54:52 +000021from email._parseaddr import parsedate as _parsedate
22from email._parseaddr import parsedate_tz as _parsedate_tz
Barry Warsawba925802001-09-23 03:17:28 +000023
Barry Warsaw24f79762004-05-09 03:55:11 +000024from quopri import decodestring as _qdecode
Barry Warsawba925802001-09-23 03:17:28 +000025
26# Intrapackage imports
Barry Warsaw21f77ac2002-06-02 19:07:16 +000027from email.Encoders import _bencode, _qencode
Barry Warsawba925802001-09-23 03:17:28 +000028
29COMMASPACE = ', '
Barry Warsaw12566a82002-06-29 05:58:04 +000030EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000031UEMPTYSTRING = u''
Barry Warsaw409a4c02002-04-10 21:01:31 +000032CRLF = '\r\n'
33
Barry Warsawa2369922003-03-10 19:20:18 +000034specialsre = re.compile(r'[][\\()<>@,:;".]')
35escapesre = re.compile(r'[][\\()"]')
Barry Warsawba925802001-09-23 03:17:28 +000036
37
Barry Warsawe968ead2001-10-04 17:05:11 +000038
Barry Warsawba925802001-09-23 03:17:28 +000039# Helpers
40
41def _identity(s):
42 return s
43
44
45def _bdecode(s):
Barry Warsawba925802001-09-23 03:17:28 +000046 # We can't quite use base64.encodestring() since it tacks on a "courtesy
47 # newline". Blech!
48 if not s:
49 return s
Barry Warsawba925802001-09-23 03:17:28 +000050 value = base64.decodestring(s)
Barry Warsaw5bdb2be2002-09-28 20:49:57 +000051 if not s.endswith('\n') and value.endswith('\n'):
Barry Warsawba925802001-09-23 03:17:28 +000052 return value[:-1]
53 return value
54
55
Barry Warsawe968ead2001-10-04 17:05:11 +000056
Barry Warsaw409a4c02002-04-10 21:01:31 +000057def fix_eols(s):
58 """Replace all line-ending characters with \r\n."""
59 # Fix newlines with no preceding carriage return
60 s = re.sub(r'(?<!\r)\n', CRLF, s)
61 # Fix carriage returns with no following newline
62 s = re.sub(r'\r(?!\n)', CRLF, s)
63 return s
64
65
66
67def formataddr(pair):
68 """The inverse of parseaddr(), this takes a 2-tuple of the form
69 (realname, email_address) and returns the string value suitable
Barry Warsaw5bdb2be2002-09-28 20:49:57 +000070 for an RFC 2822 From, To or Cc header.
Tim Peters8ac14952002-05-23 15:15:30 +000071
Barry Warsaw409a4c02002-04-10 21:01:31 +000072 If the first element of pair is false, then the second element is
73 returned unmodified.
74 """
75 name, address = pair
76 if name:
77 quotes = ''
78 if specialsre.search(name):
79 quotes = '"'
80 name = escapesre.sub(r'\\\g<0>', name)
81 return '%s%s%s <%s>' % (quotes, name, quotes, address)
82 return address
83
Barry Warsaw409a4c02002-04-10 21:01:31 +000084
85
Barry Warsawba925802001-09-23 03:17:28 +000086def getaddresses(fieldvalues):
87 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
88 all = COMMASPACE.join(fieldvalues)
Barry Warsawe1df15c2002-04-12 20:50:05 +000089 a = _AddressList(all)
Barry Warsaw4be9ecc2002-05-22 01:52:10 +000090 return a.addresslist
Barry Warsawba925802001-09-23 03:17:28 +000091
92
Barry Warsawe968ead2001-10-04 17:05:11 +000093
Barry Warsawba925802001-09-23 03:17:28 +000094ecre = re.compile(r'''
95 =\? # literal =?
96 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
97 \? # literal ?
98 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
99 \? # literal ?
100 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
101 \?= # literal ?=
102 ''', re.VERBOSE | re.IGNORECASE)
103
104
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000105
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000106def formatdate(timeval=None, localtime=False, usegmt=False):
Barry Warsaw9cff0e62001-11-09 17:07:28 +0000107 """Returns a date string as specified by RFC 2822, e.g.:
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000108
109 Fri, 09 Nov 2001 01:08:47 -0000
110
Barry Warsaw9cff0e62001-11-09 17:07:28 +0000111 Optional timeval if given is a floating point time value as accepted by
112 gmtime() and localtime(), otherwise the current time is used.
113
Barry Warsaw5bdb2be2002-09-28 20:49:57 +0000114 Optional localtime is a flag that when True, interprets timeval, and
Barry Warsaw9cff0e62001-11-09 17:07:28 +0000115 returns a date relative to the local timezone instead of UTC, properly
116 taking daylight savings time into account.
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000117
Tim Peterse718f612004-10-12 21:51:32 +0000118 Optional argument usegmt means that the timezone is written out as
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000119 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
120 is needed for HTTP, and is only used when localtime==False.
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000121 """
122 # Note: we cannot use strftime() because that honors the locale and RFC
123 # 2822 requires that day and month names be the English abbreviations.
124 if timeval is None:
125 timeval = time.time()
126 if localtime:
127 now = time.localtime(timeval)
128 # Calculate timezone offset, based on whether the local zone has
129 # daylight savings time, and whether DST is in effect.
130 if time.daylight and now[-1]:
131 offset = time.altzone
132 else:
133 offset = time.timezone
Barry Warsawe5739a62001-11-19 18:36:43 +0000134 hours, minutes = divmod(abs(offset), 3600)
135 # Remember offset is in seconds west of UTC, but the timezone is in
136 # minutes east of UTC, so the signs differ.
137 if offset > 0:
138 sign = '-'
139 else:
140 sign = '+'
Barry Warsawbb113862004-10-03 03:16:19 +0000141 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000142 else:
143 now = time.gmtime(timeval)
144 # Timezone offset is always -0000
Anthony Baxter3dd9e462004-10-11 13:53:08 +0000145 if usegmt:
146 zone = 'GMT'
147 else:
148 zone = '-0000'
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000149 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
150 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
151 now[2],
152 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
153 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
154 now[0], now[3], now[4], now[5],
155 zone)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000156
157
158
159def make_msgid(idstring=None):
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000160 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000161
162 <20020201195627.33539.96671@nightshade.la.mastaler.com>
163
164 Optional idstring if given is a string used to strengthen the
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000165 uniqueness of the message id.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000166 """
167 timeval = time.time()
168 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
169 pid = os.getpid()
170 randint = random.randrange(100000)
171 if idstring is None:
172 idstring = ''
173 else:
174 idstring = '.' + idstring
175 idhost = socket.getfqdn()
176 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
177 return msgid
178
179
180
181# These functions are in the standalone mimelib version only because they've
182# subsequently been fixed in the latest Python versions. We use this to worm
183# around broken older Pythons.
184def parsedate(data):
185 if not data:
186 return None
187 return _parsedate(data)
188
189
190def parsedate_tz(data):
191 if not data:
192 return None
193 return _parsedate_tz(data)
194
195
196def parseaddr(addr):
Barry Warsaw24fd0252002-04-15 22:00:25 +0000197 addrs = _AddressList(addr).addresslist
198 if not addrs:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000199 return '', ''
Barry Warsaw24fd0252002-04-15 22:00:25 +0000200 return addrs[0]
Barry Warsaw12566a82002-06-29 05:58:04 +0000201
202
Barry Warsaw184d55a2002-09-11 02:22:48 +0000203# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
204def unquote(str):
205 """Remove quotes from a string."""
206 if len(str) > 1:
207 if str.startswith('"') and str.endswith('"'):
208 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
209 if str.startswith('<') and str.endswith('>'):
210 return str[1:-1]
211 return str
212
213
Barry Warsaw12566a82002-06-29 05:58:04 +0000214
215# RFC2231-related functions - parameter encoding and decoding
216def decode_rfc2231(s):
217 """Decode string according to RFC 2231"""
218 import urllib
Barry Warsaw8e1e7f52003-03-07 22:46:41 +0000219 parts = s.split("'", 2)
220 if len(parts) == 1:
Barry Warsaw0b6f0d82003-08-19 03:49:34 +0000221 return None, None, urllib.unquote(s)
Barry Warsaw8e1e7f52003-03-07 22:46:41 +0000222 charset, language, s = parts
223 return charset, language, urllib.unquote(s)
Barry Warsaw12566a82002-06-29 05:58:04 +0000224
225
226def encode_rfc2231(s, charset=None, language=None):
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000227 """Encode string according to RFC 2231.
228
229 If neither charset nor language is given, then s is returned as-is. If
230 charset is given but not language, the string is encoded using the empty
231 string for language.
232 """
Barry Warsaw12566a82002-06-29 05:58:04 +0000233 import urllib
234 s = urllib.quote(s, safe='')
235 if charset is None and language is None:
236 return s
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000237 if language is None:
238 language = ''
239 return "%s'%s'%s" % (charset, language, s)
Barry Warsaw12566a82002-06-29 05:58:04 +0000240
241
242rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
243
244def decode_params(params):
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000245 """Decode parameters list according to RFC 2231.
246
247 params is a sequence of 2-tuples containing (content type, string value).
248 """
Barry Warsaw12566a82002-06-29 05:58:04 +0000249 new_params = []
250 # maps parameter's name to a list of continuations
251 rfc2231_params = {}
252 # params is a sequence of 2-tuples containing (content_type, string value)
253 name, value = params[0]
254 new_params.append((name, value))
255 # Cycle through each of the rest of the parameters.
256 for name, value in params[1:]:
257 value = unquote(value)
258 mo = rfc2231_continuation.match(name)
259 if mo:
260 name, num = mo.group('name', 'num')
261 if num is not None:
262 num = int(num)
263 rfc2231_param1 = rfc2231_params.setdefault(name, [])
264 rfc2231_param1.append((num, value))
265 else:
266 new_params.append((name, '"%s"' % quote(value)))
267 if rfc2231_params:
268 for name, continuations in rfc2231_params.items():
269 value = []
270 # Sort by number
271 continuations.sort()
272 # And now append all values in num order
273 for num, continuation in continuations:
274 value.append(continuation)
275 charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
Barry Warsaw8e1e7f52003-03-07 22:46:41 +0000276 new_params.append(
277 (name, (charset, language, '"%s"' % quote(value))))
Barry Warsaw12566a82002-06-29 05:58:04 +0000278 return new_params
Barry Warsawbb113862004-10-03 03:16:19 +0000279
280def collapse_rfc2231_value(value, errors='replace',
281 fallback_charset='us-ascii'):
282 if isinstance(value, tuple):
283 rawval = unquote(value[2])
284 charset = value[0] or 'us-ascii'
285 try:
286 return unicode(rawval, charset, errors)
287 except LookupError:
288 # XXX charset is unknown to Python.
289 return unicode(rawval, fallback_charset, errors)
290 else:
291 return unquote(value)