blob: e786d265863eb81cb90dc6f5b81d7e36c6ce737c [file] [log] [blame]
Barry Warsaw24f79762004-05-09 03:55:11 +00001# Copyright (C) 2001-2004 Python Software Foundation
Barry Warsawbb113862004-10-03 03:16:19 +00002# Author: Barry Warsaw
3# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00004
Barry Warsaw24f79762004-05-09 03:55:11 +00005"""Miscellaneous utilities."""
Barry Warsawba925802001-09-23 03:17:28 +00006
Barry Warsaw409a4c02002-04-10 21:01:31 +00007import os
Barry Warsaw24f79762004-05-09 03:55:11 +00008import re
9import time
10import base64
11import random
12import socket
Barry Warsaw409a4c02002-04-10 21:01:31 +000013import warnings
14from cStringIO import StringIO
Barry Warsawba925802001-09-23 03:17:28 +000015
Barry Warsaw030ddf72002-11-05 19:54:52 +000016from email._parseaddr import quote
17from email._parseaddr import AddressList as _AddressList
18from email._parseaddr import mktime_tz
Barry Warsaw409a4c02002-04-10 21:01:31 +000019
20# We need wormarounds for bugs in these methods in older Pythons (see below)
Barry Warsaw030ddf72002-11-05 19:54:52 +000021from email._parseaddr import parsedate as _parsedate
22from email._parseaddr import parsedate_tz as _parsedate_tz
Barry Warsawba925802001-09-23 03:17:28 +000023
Barry Warsaw24f79762004-05-09 03:55:11 +000024from quopri import decodestring as _qdecode
Barry Warsawba925802001-09-23 03:17:28 +000025
26# Intrapackage imports
Barry Warsaw21f77ac2002-06-02 19:07:16 +000027from email.Encoders import _bencode, _qencode
Barry Warsawba925802001-09-23 03:17:28 +000028
29COMMASPACE = ', '
Barry Warsaw12566a82002-06-29 05:58:04 +000030EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000031UEMPTYSTRING = u''
Barry Warsaw409a4c02002-04-10 21:01:31 +000032CRLF = '\r\n'
33
Barry Warsawa2369922003-03-10 19:20:18 +000034specialsre = re.compile(r'[][\\()<>@,:;".]')
35escapesre = re.compile(r'[][\\()"]')
Barry Warsawba925802001-09-23 03:17:28 +000036
37
Barry Warsawe968ead2001-10-04 17:05:11 +000038
Barry Warsawba925802001-09-23 03:17:28 +000039# Helpers
40
41def _identity(s):
42 return s
43
44
45def _bdecode(s):
Barry Warsawba925802001-09-23 03:17:28 +000046 # We can't quite use base64.encodestring() since it tacks on a "courtesy
47 # newline". Blech!
48 if not s:
49 return s
Barry Warsawba925802001-09-23 03:17:28 +000050 value = base64.decodestring(s)
Barry Warsaw5bdb2be2002-09-28 20:49:57 +000051 if not s.endswith('\n') and value.endswith('\n'):
Barry Warsawba925802001-09-23 03:17:28 +000052 return value[:-1]
53 return value
54
55
Barry Warsawe968ead2001-10-04 17:05:11 +000056
Barry Warsaw409a4c02002-04-10 21:01:31 +000057def fix_eols(s):
58 """Replace all line-ending characters with \r\n."""
59 # Fix newlines with no preceding carriage return
60 s = re.sub(r'(?<!\r)\n', CRLF, s)
61 # Fix carriage returns with no following newline
62 s = re.sub(r'\r(?!\n)', CRLF, s)
63 return s
64
65
66
67def formataddr(pair):
68 """The inverse of parseaddr(), this takes a 2-tuple of the form
69 (realname, email_address) and returns the string value suitable
Barry Warsaw5bdb2be2002-09-28 20:49:57 +000070 for an RFC 2822 From, To or Cc header.
Tim Peters8ac14952002-05-23 15:15:30 +000071
Barry Warsaw409a4c02002-04-10 21:01:31 +000072 If the first element of pair is false, then the second element is
73 returned unmodified.
74 """
75 name, address = pair
76 if name:
77 quotes = ''
78 if specialsre.search(name):
79 quotes = '"'
80 name = escapesre.sub(r'\\\g<0>', name)
81 return '%s%s%s <%s>' % (quotes, name, quotes, address)
82 return address
83
Barry Warsaw409a4c02002-04-10 21:01:31 +000084
85
Barry Warsawba925802001-09-23 03:17:28 +000086def getaddresses(fieldvalues):
87 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
88 all = COMMASPACE.join(fieldvalues)
Barry Warsawe1df15c2002-04-12 20:50:05 +000089 a = _AddressList(all)
Barry Warsaw4be9ecc2002-05-22 01:52:10 +000090 return a.addresslist
Barry Warsawba925802001-09-23 03:17:28 +000091
92
Barry Warsawe968ead2001-10-04 17:05:11 +000093
Barry Warsawba925802001-09-23 03:17:28 +000094ecre = re.compile(r'''
95 =\? # literal =?
96 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
97 \? # literal ?
98 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
99 \? # literal ?
100 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
101 \?= # literal ?=
102 ''', re.VERBOSE | re.IGNORECASE)
103
104
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000105
Barry Warsaw5bdb2be2002-09-28 20:49:57 +0000106def formatdate(timeval=None, localtime=False):
Barry Warsaw9cff0e62001-11-09 17:07:28 +0000107 """Returns a date string as specified by RFC 2822, e.g.:
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000108
109 Fri, 09 Nov 2001 01:08:47 -0000
110
Barry Warsaw9cff0e62001-11-09 17:07:28 +0000111 Optional timeval if given is a floating point time value as accepted by
112 gmtime() and localtime(), otherwise the current time is used.
113
Barry Warsaw5bdb2be2002-09-28 20:49:57 +0000114 Optional localtime is a flag that when True, interprets timeval, and
Barry Warsaw9cff0e62001-11-09 17:07:28 +0000115 returns a date relative to the local timezone instead of UTC, properly
116 taking daylight savings time into account.
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000117 """
118 # Note: we cannot use strftime() because that honors the locale and RFC
119 # 2822 requires that day and month names be the English abbreviations.
120 if timeval is None:
121 timeval = time.time()
122 if localtime:
123 now = time.localtime(timeval)
124 # Calculate timezone offset, based on whether the local zone has
125 # daylight savings time, and whether DST is in effect.
126 if time.daylight and now[-1]:
127 offset = time.altzone
128 else:
129 offset = time.timezone
Barry Warsawe5739a62001-11-19 18:36:43 +0000130 hours, minutes = divmod(abs(offset), 3600)
131 # Remember offset is in seconds west of UTC, but the timezone is in
132 # minutes east of UTC, so the signs differ.
133 if offset > 0:
134 sign = '-'
135 else:
136 sign = '+'
Barry Warsawbb113862004-10-03 03:16:19 +0000137 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
Barry Warsawaa79f4d2001-11-09 16:59:56 +0000138 else:
139 now = time.gmtime(timeval)
140 # Timezone offset is always -0000
141 zone = '-0000'
142 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
143 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
144 now[2],
145 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
146 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
147 now[0], now[3], now[4], now[5],
148 zone)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000149
150
151
152def make_msgid(idstring=None):
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000153 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000154
155 <20020201195627.33539.96671@nightshade.la.mastaler.com>
156
157 Optional idstring if given is a string used to strengthen the
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000158 uniqueness of the message id.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000159 """
160 timeval = time.time()
161 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
162 pid = os.getpid()
163 randint = random.randrange(100000)
164 if idstring is None:
165 idstring = ''
166 else:
167 idstring = '.' + idstring
168 idhost = socket.getfqdn()
169 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
170 return msgid
171
172
173
174# These functions are in the standalone mimelib version only because they've
175# subsequently been fixed in the latest Python versions. We use this to worm
176# around broken older Pythons.
177def parsedate(data):
178 if not data:
179 return None
180 return _parsedate(data)
181
182
183def parsedate_tz(data):
184 if not data:
185 return None
186 return _parsedate_tz(data)
187
188
189def parseaddr(addr):
Barry Warsaw24fd0252002-04-15 22:00:25 +0000190 addrs = _AddressList(addr).addresslist
191 if not addrs:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000192 return '', ''
Barry Warsaw24fd0252002-04-15 22:00:25 +0000193 return addrs[0]
Barry Warsaw12566a82002-06-29 05:58:04 +0000194
195
Barry Warsaw184d55a2002-09-11 02:22:48 +0000196# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
197def unquote(str):
198 """Remove quotes from a string."""
199 if len(str) > 1:
200 if str.startswith('"') and str.endswith('"'):
201 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
202 if str.startswith('<') and str.endswith('>'):
203 return str[1:-1]
204 return str
205
206
Barry Warsaw12566a82002-06-29 05:58:04 +0000207
208# RFC2231-related functions - parameter encoding and decoding
209def decode_rfc2231(s):
210 """Decode string according to RFC 2231"""
211 import urllib
Barry Warsaw8e1e7f52003-03-07 22:46:41 +0000212 parts = s.split("'", 2)
213 if len(parts) == 1:
Barry Warsaw0b6f0d82003-08-19 03:49:34 +0000214 return None, None, urllib.unquote(s)
Barry Warsaw8e1e7f52003-03-07 22:46:41 +0000215 charset, language, s = parts
216 return charset, language, urllib.unquote(s)
Barry Warsaw12566a82002-06-29 05:58:04 +0000217
218
219def encode_rfc2231(s, charset=None, language=None):
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000220 """Encode string according to RFC 2231.
221
222 If neither charset nor language is given, then s is returned as-is. If
223 charset is given but not language, the string is encoded using the empty
224 string for language.
225 """
Barry Warsaw12566a82002-06-29 05:58:04 +0000226 import urllib
227 s = urllib.quote(s, safe='')
228 if charset is None and language is None:
229 return s
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000230 if language is None:
231 language = ''
232 return "%s'%s'%s" % (charset, language, s)
Barry Warsaw12566a82002-06-29 05:58:04 +0000233
234
235rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
236
237def decode_params(params):
Barry Warsaw0ebc5c92002-10-01 00:44:13 +0000238 """Decode parameters list according to RFC 2231.
239
240 params is a sequence of 2-tuples containing (content type, string value).
241 """
Barry Warsaw12566a82002-06-29 05:58:04 +0000242 new_params = []
243 # maps parameter's name to a list of continuations
244 rfc2231_params = {}
245 # params is a sequence of 2-tuples containing (content_type, string value)
246 name, value = params[0]
247 new_params.append((name, value))
248 # Cycle through each of the rest of the parameters.
249 for name, value in params[1:]:
250 value = unquote(value)
251 mo = rfc2231_continuation.match(name)
252 if mo:
253 name, num = mo.group('name', 'num')
254 if num is not None:
255 num = int(num)
256 rfc2231_param1 = rfc2231_params.setdefault(name, [])
257 rfc2231_param1.append((num, value))
258 else:
259 new_params.append((name, '"%s"' % quote(value)))
260 if rfc2231_params:
261 for name, continuations in rfc2231_params.items():
262 value = []
263 # Sort by number
264 continuations.sort()
265 # And now append all values in num order
266 for num, continuation in continuations:
267 value.append(continuation)
268 charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
Barry Warsaw8e1e7f52003-03-07 22:46:41 +0000269 new_params.append(
270 (name, (charset, language, '"%s"' % quote(value))))
Barry Warsaw12566a82002-06-29 05:58:04 +0000271 return new_params
Barry Warsawbb113862004-10-03 03:16:19 +0000272
273def collapse_rfc2231_value(value, errors='replace',
274 fallback_charset='us-ascii'):
275 if isinstance(value, tuple):
276 rawval = unquote(value[2])
277 charset = value[0] or 'us-ascii'
278 try:
279 return unicode(rawval, charset, errors)
280 except LookupError:
281 # XXX charset is unknown to Python.
282 return unicode(rawval, fallback_charset, errors)
283 else:
284 return unquote(value)