Blame - Lib/email/utils.py - platform/external/python/cpython2

blob: 5771209c1be8e74432bd6005aa361971b784412c [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Miscellaneous utilities."""
				6
				7	__all__ = [
				8	'collapse_rfc2231_value',
				9	'decode_params',
				10	'decode_rfc2231',
				11	'encode_rfc2231',
				12	'formataddr',
				13	'formatdate',
				14	'getaddresses',
				15	'make_msgid',
				16	'parseaddr',
				17	'parsedate',
				18	'parsedate_tz',
				19	'unquote',
				20	]
				21
				22	import os
				23	import re
				24	import time
				25	import base64
				26	import random
				27	import socket
				28	import urllib
				29	import warnings
				30	from io import StringIO
				31
				32	from email._parseaddr import quote
				33	from email._parseaddr import AddressList as _AddressList
				34	from email._parseaddr import mktime_tz
				35
				36	# We need wormarounds for bugs in these methods in older Pythons (see below)
				37	from email._parseaddr import parsedate as _parsedate
				38	from email._parseaddr import parsedate_tz as _parsedate_tz
				39
				40	from quopri import decodestring as _qdecode
				41
				42	# Intrapackage imports
				43	from email.encoders import _bencode, _qencode
				44
				45	COMMASPACE = ', '
				46	EMPTYSTRING = ''
				47	UEMPTYSTRING = ''
				48	CRLF = '\r\n'
				49	TICK = "'"
				50
				51	specialsre = re.compile(r'[][\\()<>@,:;".]')
				52	escapesre = re.compile(r'[][\\()"]')
				53
				54
				55
				56	# Helpers
				57
				58	def _identity(s):
				59	return s
				60
				61
				62	def _bdecode(s):
				63	# We can't quite use base64.encodestring() since it tacks on a "courtesy
				64	# newline". Blech!
				65	if not s:
				66	return s
				67	value = base64.decodestring(s)
				68	if not s.endswith('\n') and value.endswith('\n'):
				69	return value[:-1]
				70	return value
				71
				72
				73
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	74	def formataddr(pair):
				75	"""The inverse of parseaddr(), this takes a 2-tuple of the form
				76	(realname, email_address) and returns the string value suitable
				77	for an RFC 2822 From, To or Cc header.
				78
				79	If the first element of pair is false, then the second element is
				80	returned unmodified.
				81	"""
				82	name, address = pair
				83	if name:
				84	quotes = ''
				85	if specialsre.search(name):
				86	quotes = '"'
				87	name = escapesre.sub(r'\\\g<0>', name)
				88	return '%s%s%s <%s>' % (quotes, name, quotes, address)
				89	return address
				90
				91
				92
				93	def getaddresses(fieldvalues):
				94	"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
				95	all = COMMASPACE.join(fieldvalues)
				96	a = _AddressList(all)
				97	return a.addresslist
				98
				99
				100
				101	ecre = re.compile(r'''
				102	=\? # literal =?
				103	(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
				104	\? # literal ?
				105	(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
				106	\? # literal ?
				107	(?P<atom>.*?) # non-greedy up to the next ?= is the atom
				108	\?= # literal ?=
				109	''', re.VERBOSE \| re.IGNORECASE)
				110
				111
				112
				113	def formatdate(timeval=None, localtime=False, usegmt=False):
				114	"""Returns a date string as specified by RFC 2822, e.g.:
				115
				116	Fri, 09 Nov 2001 01:08:47 -0000
				117
				118	Optional timeval if given is a floating point time value as accepted by
				119	gmtime() and localtime(), otherwise the current time is used.
				120
				121	Optional localtime is a flag that when True, interprets timeval, and
				122	returns a date relative to the local timezone instead of UTC, properly
				123	taking daylight savings time into account.
				124
				125	Optional argument usegmt means that the timezone is written out as
				126	an ascii string, not numeric one (so "GMT" instead of "+0000"). This
				127	is needed for HTTP, and is only used when localtime==False.
				128	"""
				129	# Note: we cannot use strftime() because that honors the locale and RFC
				130	# 2822 requires that day and month names be the English abbreviations.
				131	if timeval is None:
				132	timeval = time.time()
				133	if localtime:
				134	now = time.localtime(timeval)
				135	# Calculate timezone offset, based on whether the local zone has
				136	# daylight savings time, and whether DST is in effect.
				137	if time.daylight and now[-1]:
				138	offset = time.altzone
				139	else:
				140	offset = time.timezone
				141	hours, minutes = divmod(abs(offset), 3600)
				142	# Remember offset is in seconds west of UTC, but the timezone is in
				143	# minutes east of UTC, so the signs differ.
				144	if offset > 0:
				145	sign = '-'
				146	else:
				147	sign = '+'
				148	zone = '%s%02d%02d' % (sign, hours, minutes // 60)
				149	else:
				150	now = time.gmtime(timeval)
				151	# Timezone offset is always -0000
				152	if usegmt:
				153	zone = 'GMT'
				154	else:
				155	zone = '-0000'
				156	return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
				157	['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
				158	now[2],
				159	['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				160	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
				161	now[0], now[3], now[4], now[5],
				162	zone)
				163
				164
				165
				166	def make_msgid(idstring=None):
				167	"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
				168
				169	<20020201195627.33539.96671@nightshade.la.mastaler.com>
				170
				171	Optional idstring if given is a string used to strengthen the
				172	uniqueness of the message id.
				173	"""
				174	timeval = time.time()
				175	utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
				176	pid = os.getpid()
				177	randint = random.randrange(100000)
				178	if idstring is None:
				179	idstring = ''
				180	else:
				181	idstring = '.' + idstring
				182	idhost = socket.getfqdn()
				183	msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
				184	return msgid
				185
				186
				187
				188	# These functions are in the standalone mimelib version only because they've
				189	# subsequently been fixed in the latest Python versions. We use this to worm
				190	# around broken older Pythons.
				191	def parsedate(data):
				192	if not data:
				193	return None
				194	return _parsedate(data)
				195
				196
				197	def parsedate_tz(data):
				198	if not data:
				199	return None
				200	return _parsedate_tz(data)
				201
				202
				203	def parseaddr(addr):
				204	addrs = _AddressList(addr).addresslist
				205	if not addrs:
				206	return '', ''
				207	return addrs[0]
				208
				209
				210	# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
				211	def unquote(str):
				212	"""Remove quotes from a string."""
				213	if len(str) > 1:
				214	if str.startswith('"') and str.endswith('"'):
				215	return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
				216	if str.startswith('<') and str.endswith('>'):
				217	return str[1:-1]
				218	return str
				219
				220
				221
				222	# RFC2231-related functions - parameter encoding and decoding
				223	def decode_rfc2231(s):
				224	"""Decode string according to RFC 2231"""
				225	parts = s.split(TICK, 2)
				226	if len(parts) <= 2:
				227	return None, None, s
				228	return parts
				229
				230
				231	def encode_rfc2231(s, charset=None, language=None):
				232	"""Encode string according to RFC 2231.
				233
				234	If neither charset nor language is given, then s is returned as-is. If
				235	charset is given but not language, the string is encoded using the empty
				236	string for language.
				237	"""
				238	import urllib
				239	s = urllib.quote(s, safe='')
				240	if charset is None and language is None:
				241	return s
				242	if language is None:
				243	language = ''
				244	return "%s'%s'%s" % (charset, language, s)
				245
				246
				247	rfc2231_continuation = re.compile(r'^(?P<name>\w+)\((?P<num>[0-9]+)\?)?$')
				248
				249	def decode_params(params):
				250	"""Decode parameters list according to RFC 2231.
				251
				252	params is a sequence of 2-tuples containing (param name, string value).
				253	"""
				254	# Copy params so we don't mess with the original
				255	params = params[:]
				256	new_params = []
				257	# Map parameter's name to a list of continuations. The values are a
				258	# 3-tuple of the continuation number, the string value, and a flag
				259	# specifying whether a particular segment is %-encoded.
				260	rfc2231_params = {}
				261	name, value = params.pop(0)
				262	new_params.append((name, value))
				263	while params:
				264	name, value = params.pop(0)
				265	if name.endswith('*'):
				266	encoded = True
				267	else:
				268	encoded = False
				269	value = unquote(value)
				270	mo = rfc2231_continuation.match(name)
				271	if mo:
				272	name, num = mo.group('name', 'num')
				273	if num is not None:
				274	num = int(num)
				275	rfc2231_params.setdefault(name, []).append((num, value, encoded))
				276	else:
				277	new_params.append((name, '"%s"' % quote(value)))
				278	if rfc2231_params:
				279	for name, continuations in rfc2231_params.items():
				280	value = []
				281	extended = False
				282	# Sort by number
				283	continuations.sort()
				284	# And now append all values in numerical order, converting
				285	# %-encodings for the encoded segments. If any of the
				286	# continuation names ends in a *, then the entire string, after
				287	# decoding segments and concatenating, must have the charset and
				288	# language specifiers at the beginning of the string.
				289	for num, s, encoded in continuations:
				290	if encoded:
				291	s = urllib.unquote(s)
				292	extended = True
				293	value.append(s)
				294	value = quote(EMPTYSTRING.join(value))
				295	if extended:
				296	charset, language, value = decode_rfc2231(value)
				297	new_params.append((name, (charset, language, '"%s"' % value)))
				298	else:
				299	new_params.append((name, '"%s"' % value))
				300	return new_params
				301
				302	def collapse_rfc2231_value(value, errors='replace',
				303	fallback_charset='us-ascii'):
				304	if not isinstance(value, tuple) or len(value) != 3:
				305	return unquote(value)
				306	# While value comes to us as a unicode string, we need it to be a bytes
				307	# object. We do not want bytes() normal utf-8 decoder, we want a straight
				308	# interpretation of the string as character bytes.
				309	charset, language, text = value
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame^]	310	rawbytes = bytes(text, 'raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	311	try:
				312	return str(rawbytes, charset, errors)
				313	except LookupError:
				314	# charset is not a known codec.
				315	return unquote(text)