Blame - Lib/email/utils.py - platform/external/python/cpython3

blob: 404cd9698c57fef392f024b4cfd6f28ff631d63c [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame^]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Miscellaneous utilities."""
				6
				7	__all__ = [
				8	'collapse_rfc2231_value',
				9	'decode_params',
				10	'decode_rfc2231',
				11	'encode_rfc2231',
				12	'formataddr',
				13	'formatdate',
				14	'getaddresses',
				15	'make_msgid',
				16	'parseaddr',
				17	'parsedate',
				18	'parsedate_tz',
				19	'unquote',
				20	]
				21
				22	import os
				23	import re
				24	import time
				25	import base64
				26	import random
				27	import socket
				28	import urllib
				29	import warnings
				30	from io import StringIO
				31
				32	from email._parseaddr import quote
				33	from email._parseaddr import AddressList as _AddressList
				34	from email._parseaddr import mktime_tz
				35
				36	# We need wormarounds for bugs in these methods in older Pythons (see below)
				37	from email._parseaddr import parsedate as _parsedate
				38	from email._parseaddr import parsedate_tz as _parsedate_tz
				39
				40	from quopri import decodestring as _qdecode
				41
				42	# Intrapackage imports
				43	from email.encoders import _bencode, _qencode
				44
				45	COMMASPACE = ', '
				46	EMPTYSTRING = ''
				47	UEMPTYSTRING = ''
				48	CRLF = '\r\n'
				49	TICK = "'"
				50
				51	specialsre = re.compile(r'[][\\()<>@,:;".]')
				52	escapesre = re.compile(r'[][\\()"]')
				53
				54
				55
				56	# Helpers
				57
				58	def _identity(s):
				59	return s
				60
				61
				62	def _bdecode(s):
				63	# We can't quite use base64.encodestring() since it tacks on a "courtesy
				64	# newline". Blech!
				65	if not s:
				66	return s
				67	value = base64.decodestring(s)
				68	if not s.endswith('\n') and value.endswith('\n'):
				69	return value[:-1]
				70	return value
				71
				72
				73
				74	def fix_eols(s):
				75	"""Replace all line-ending characters with \r\n."""
				76	# Fix newlines with no preceding carriage return
				77	s = re.sub(r'(?<!\r)\n', CRLF, s)
				78	# Fix carriage returns with no following newline
				79	s = re.sub(r'\r(?!\n)', CRLF, s)
				80	return s
				81
				82
				83
				84	def formataddr(pair):
				85	"""The inverse of parseaddr(), this takes a 2-tuple of the form
				86	(realname, email_address) and returns the string value suitable
				87	for an RFC 2822 From, To or Cc header.
				88
				89	If the first element of pair is false, then the second element is
				90	returned unmodified.
				91	"""
				92	name, address = pair
				93	if name:
				94	quotes = ''
				95	if specialsre.search(name):
				96	quotes = '"'
				97	name = escapesre.sub(r'\\\g<0>', name)
				98	return '%s%s%s <%s>' % (quotes, name, quotes, address)
				99	return address
				100
				101
				102
				103	def getaddresses(fieldvalues):
				104	"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
				105	all = COMMASPACE.join(fieldvalues)
				106	a = _AddressList(all)
				107	return a.addresslist
				108
				109
				110
				111	ecre = re.compile(r'''
				112	=\? # literal =?
				113	(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
				114	\? # literal ?
				115	(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
				116	\? # literal ?
				117	(?P<atom>.*?) # non-greedy up to the next ?= is the atom
				118	\?= # literal ?=
				119	''', re.VERBOSE \| re.IGNORECASE)
				120
				121
				122
				123	def formatdate(timeval=None, localtime=False, usegmt=False):
				124	"""Returns a date string as specified by RFC 2822, e.g.:
				125
				126	Fri, 09 Nov 2001 01:08:47 -0000
				127
				128	Optional timeval if given is a floating point time value as accepted by
				129	gmtime() and localtime(), otherwise the current time is used.
				130
				131	Optional localtime is a flag that when True, interprets timeval, and
				132	returns a date relative to the local timezone instead of UTC, properly
				133	taking daylight savings time into account.
				134
				135	Optional argument usegmt means that the timezone is written out as
				136	an ascii string, not numeric one (so "GMT" instead of "+0000"). This
				137	is needed for HTTP, and is only used when localtime==False.
				138	"""
				139	# Note: we cannot use strftime() because that honors the locale and RFC
				140	# 2822 requires that day and month names be the English abbreviations.
				141	if timeval is None:
				142	timeval = time.time()
				143	if localtime:
				144	now = time.localtime(timeval)
				145	# Calculate timezone offset, based on whether the local zone has
				146	# daylight savings time, and whether DST is in effect.
				147	if time.daylight and now[-1]:
				148	offset = time.altzone
				149	else:
				150	offset = time.timezone
				151	hours, minutes = divmod(abs(offset), 3600)
				152	# Remember offset is in seconds west of UTC, but the timezone is in
				153	# minutes east of UTC, so the signs differ.
				154	if offset > 0:
				155	sign = '-'
				156	else:
				157	sign = '+'
				158	zone = '%s%02d%02d' % (sign, hours, minutes // 60)
				159	else:
				160	now = time.gmtime(timeval)
				161	# Timezone offset is always -0000
				162	if usegmt:
				163	zone = 'GMT'
				164	else:
				165	zone = '-0000'
				166	return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
				167	['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
				168	now[2],
				169	['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				170	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
				171	now[0], now[3], now[4], now[5],
				172	zone)
				173
				174
				175
				176	def make_msgid(idstring=None):
				177	"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
				178
				179	<20020201195627.33539.96671@nightshade.la.mastaler.com>
				180
				181	Optional idstring if given is a string used to strengthen the
				182	uniqueness of the message id.
				183	"""
				184	timeval = time.time()
				185	utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
				186	pid = os.getpid()
				187	randint = random.randrange(100000)
				188	if idstring is None:
				189	idstring = ''
				190	else:
				191	idstring = '.' + idstring
				192	idhost = socket.getfqdn()
				193	msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
				194	return msgid
				195
				196
				197
				198	# These functions are in the standalone mimelib version only because they've
				199	# subsequently been fixed in the latest Python versions. We use this to worm
				200	# around broken older Pythons.
				201	def parsedate(data):
				202	if not data:
				203	return None
				204	return _parsedate(data)
				205
				206
				207	def parsedate_tz(data):
				208	if not data:
				209	return None
				210	return _parsedate_tz(data)
				211
				212
				213	def parseaddr(addr):
				214	addrs = _AddressList(addr).addresslist
				215	if not addrs:
				216	return '', ''
				217	return addrs[0]
				218
				219
				220	# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
				221	def unquote(str):
				222	"""Remove quotes from a string."""
				223	if len(str) > 1:
				224	if str.startswith('"') and str.endswith('"'):
				225	return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
				226	if str.startswith('<') and str.endswith('>'):
				227	return str[1:-1]
				228	return str
				229
				230
				231
				232	# RFC2231-related functions - parameter encoding and decoding
				233	def decode_rfc2231(s):
				234	"""Decode string according to RFC 2231"""
				235	parts = s.split(TICK, 2)
				236	if len(parts) <= 2:
				237	return None, None, s
				238	return parts
				239
				240
				241	def encode_rfc2231(s, charset=None, language=None):
				242	"""Encode string according to RFC 2231.
				243
				244	If neither charset nor language is given, then s is returned as-is. If
				245	charset is given but not language, the string is encoded using the empty
				246	string for language.
				247	"""
				248	import urllib
				249	s = urllib.quote(s, safe='')
				250	if charset is None and language is None:
				251	return s
				252	if language is None:
				253	language = ''
				254	return "%s'%s'%s" % (charset, language, s)
				255
				256
				257	rfc2231_continuation = re.compile(r'^(?P<name>\w+)\((?P<num>[0-9]+)\?)?$')
				258
				259	def decode_params(params):
				260	"""Decode parameters list according to RFC 2231.
				261
				262	params is a sequence of 2-tuples containing (param name, string value).
				263	"""
				264	# Copy params so we don't mess with the original
				265	params = params[:]
				266	new_params = []
				267	# Map parameter's name to a list of continuations. The values are a
				268	# 3-tuple of the continuation number, the string value, and a flag
				269	# specifying whether a particular segment is %-encoded.
				270	rfc2231_params = {}
				271	name, value = params.pop(0)
				272	new_params.append((name, value))
				273	while params:
				274	name, value = params.pop(0)
				275	if name.endswith('*'):
				276	encoded = True
				277	else:
				278	encoded = False
				279	value = unquote(value)
				280	mo = rfc2231_continuation.match(name)
				281	if mo:
				282	name, num = mo.group('name', 'num')
				283	if num is not None:
				284	num = int(num)
				285	rfc2231_params.setdefault(name, []).append((num, value, encoded))
				286	else:
				287	new_params.append((name, '"%s"' % quote(value)))
				288	if rfc2231_params:
				289	for name, continuations in rfc2231_params.items():
				290	value = []
				291	extended = False
				292	# Sort by number
				293	continuations.sort()
				294	# And now append all values in numerical order, converting
				295	# %-encodings for the encoded segments. If any of the
				296	# continuation names ends in a *, then the entire string, after
				297	# decoding segments and concatenating, must have the charset and
				298	# language specifiers at the beginning of the string.
				299	for num, s, encoded in continuations:
				300	if encoded:
				301	s = urllib.unquote(s)
				302	extended = True
				303	value.append(s)
				304	value = quote(EMPTYSTRING.join(value))
				305	if extended:
				306	charset, language, value = decode_rfc2231(value)
				307	new_params.append((name, (charset, language, '"%s"' % value)))
				308	else:
				309	new_params.append((name, '"%s"' % value))
				310	return new_params
				311
				312	def collapse_rfc2231_value(value, errors='replace',
				313	fallback_charset='us-ascii'):
				314	if not isinstance(value, tuple) or len(value) != 3:
				315	return unquote(value)
				316	# While value comes to us as a unicode string, we need it to be a bytes
				317	# object. We do not want bytes() normal utf-8 decoder, we want a straight
				318	# interpretation of the string as character bytes.
				319	charset, language, text = value
				320	rawbytes = bytes(ord(c) for c in text)
				321	try:
				322	return str(rawbytes, charset, errors)
				323	except LookupError:
				324	# charset is not a known codec.
				325	return unquote(text)