#19662: add decode_data to smtpd so you can get at DATA in bytes form.
Otherwise smtpd is restricted to 7bit clean data, since even if the
incoming data is actually utf-8, it will often break things to decode
it before parsing the message.
Patch by Maciej Szulik, with some adjustments (mostly the warning
support).
diff --git a/Lib/smtpd.py b/Lib/smtpd.py
index 1fa157a..569b42e 100755
--- a/Lib/smtpd.py
+++ b/Lib/smtpd.py
@@ -98,7 +98,6 @@
DEBUGSTREAM = Devnull()
NEWLINE = '\n'
-EMPTYSTRING = ''
COMMASPACE = ', '
DATA_SIZE_DEFAULT = 33554432
@@ -122,12 +121,28 @@
max_command_size_limit = max(command_size_limits.values())
def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT,
- map=None):
+ map=None, decode_data=None):
asynchat.async_chat.__init__(self, conn, map=map)
self.smtp_server = server
self.conn = conn
self.addr = addr
self.data_size_limit = data_size_limit
+ if decode_data is None:
+ warn("The decode_data default of True will change to False in 3.6;"
+ " specify an explicit value for this keyword",
+ DeprecationWarning, 2)
+ decode_data = True
+ self._decode_data = decode_data
+ if decode_data:
+ self._emptystring = ''
+ self._linesep = '\r\n'
+ self._dotsep = '.'
+ self._newline = NEWLINE
+ else:
+ self._emptystring = b''
+ self._linesep = b'\r\n'
+ self._dotsep = b'.'
+ self._newline = b'\n'
self.received_lines = []
self.smtp_state = self.COMMAND
self.seen_greeting = ''
@@ -287,11 +302,14 @@
return
elif limit:
self.num_bytes += len(data)
- self.received_lines.append(str(data, "utf-8"))
+ if self._decode_data:
+ self.received_lines.append(str(data, 'utf-8'))
+ else:
+ self.received_lines.append(data)
# Implementation of base class abstract method
def found_terminator(self):
- line = EMPTYSTRING.join(self.received_lines)
+ line = self._emptystring.join(self.received_lines)
print('Data:', repr(line), file=DEBUGSTREAM)
self.received_lines = []
if self.smtp_state == self.COMMAND:
@@ -300,6 +318,8 @@
self.push('500 Error: bad syntax')
return
method = None
+ if not self._decode_data:
+ line = str(line, 'utf-8')
i = line.find(' ')
if i < 0:
command = line.upper()
@@ -330,12 +350,12 @@
# Remove extraneous carriage returns and de-transparency according
# to RFC 5321, Section 4.5.2.
data = []
- for text in line.split('\r\n'):
- if text and text[0] == '.':
+ for text in line.split(self._linesep):
+ if text and text[0] == self._dotsep:
data.append(text[1:])
else:
data.append(text)
- self.received_data = NEWLINE.join(data)
+ self.received_data = self._newline.join(data)
status = self.smtp_server.process_message(self.peer,
self.mailfrom,
self.rcpttos,
@@ -577,10 +597,17 @@
channel_class = SMTPChannel
def __init__(self, localaddr, remoteaddr,
- data_size_limit=DATA_SIZE_DEFAULT, map=None):
+ data_size_limit=DATA_SIZE_DEFAULT, map=None,
+ decode_data=None):
self._localaddr = localaddr
self._remoteaddr = remoteaddr
self.data_size_limit = data_size_limit
+ if decode_data is None:
+ warn("The decode_data default of True will change to False in 3.6;"
+ " specify an explicit value for this keyword",
+ DeprecationWarning, 2)
+ decode_data = True
+ self._decode_data = decode_data
asyncore.dispatcher.__init__(self, map=map)
try:
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -599,7 +626,7 @@
def handle_accepted(self, conn, addr):
print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM)
channel = self.channel_class(self, conn, addr, self.data_size_limit,
- self._map)
+ self._map, self._decode_data)
# API for "doing something useful with the message"
def process_message(self, peer, mailfrom, rcpttos, data):
diff --git a/Lib/test/test_smtpd.py b/Lib/test/test_smtpd.py
index 93f14c4..db1f52b 100644
--- a/Lib/test/test_smtpd.py
+++ b/Lib/test/test_smtpd.py
@@ -7,13 +7,18 @@
class DummyServer(smtpd.SMTPServer):
- def __init__(self, localaddr, remoteaddr):
- smtpd.SMTPServer.__init__(self, localaddr, remoteaddr)
+ def __init__(self, localaddr, remoteaddr, decode_data=True):
+ smtpd.SMTPServer.__init__(self, localaddr, remoteaddr,
+ decode_data=decode_data)
self.messages = []
+ if decode_data:
+ self.return_status = 'return status'
+ else:
+ self.return_status = b'return status'
def process_message(self, peer, mailfrom, rcpttos, data):
self.messages.append((peer, mailfrom, rcpttos, data))
- if data == 'return status':
+ if data == self.return_status:
return '250 Okish'
@@ -31,9 +36,9 @@
smtpd.socket = asyncore.socket = mock_socket
def test_process_message_unimplemented(self):
- server = smtpd.SMTPServer('a', 'b')
+ server = smtpd.SMTPServer('a', 'b', decode_data=True)
conn, addr = server.accept()
- channel = smtpd.SMTPChannel(server, conn, addr)
+ channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True)
def write_line(line):
channel.socket.queue_recv(line)
@@ -45,6 +50,10 @@
write_line(b'DATA')
self.assertRaises(NotImplementedError, write_line, b'spam\r\n.\r\n')
+ def test_decode_data_default_warns(self):
+ with self.assertWarns(DeprecationWarning):
+ smtpd.SMTPServer('a', 'b')
+
def tearDown(self):
asyncore.close_all()
asyncore.socket = smtpd.socket = socket
@@ -57,7 +66,8 @@
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
self.server = DummyServer('a', 'b')
conn, addr = self.server.accept()
- self.channel = smtpd.SMTPChannel(self.server, conn, addr)
+ self.channel = smtpd.SMTPChannel(self.server, conn, addr,
+ decode_data=True)
def tearDown(self):
asyncore.close_all()
@@ -502,6 +512,12 @@
with support.check_warnings(('', DeprecationWarning)):
self.channel._SMTPChannel__addr = 'spam'
+ def test_decode_data_default_warning(self):
+ server = DummyServer('a', 'b')
+ conn, addr = self.server.accept()
+ with self.assertWarns(DeprecationWarning):
+ smtpd.SMTPChannel(server, conn, addr)
+
class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase):
@@ -512,7 +528,8 @@
self.server = DummyServer('a', 'b')
conn, addr = self.server.accept()
# Set DATA size limit to 32 bytes for easy testing
- self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32)
+ self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32,
+ decode_data=True)
def tearDown(self):
asyncore.close_all()
@@ -553,5 +570,92 @@
b'552 Error: Too much mail data\r\n')
+class SMTPDChannelWithDecodeDataFalse(unittest.TestCase):
+
+ def setUp(self):
+ smtpd.socket = asyncore.socket = mock_socket
+ self.old_debugstream = smtpd.DEBUGSTREAM
+ self.debug = smtpd.DEBUGSTREAM = io.StringIO()
+ self.server = DummyServer('a', 'b', decode_data=False)
+ conn, addr = self.server.accept()
+ # Set decode_data to False
+ self.channel = smtpd.SMTPChannel(self.server, conn, addr,
+ decode_data=False)
+
+ def tearDown(self):
+ asyncore.close_all()
+ asyncore.socket = smtpd.socket = socket
+ smtpd.DEBUGSTREAM = self.old_debugstream
+
+ def write_line(self, line):
+ self.channel.socket.queue_recv(line)
+ self.channel.handle_read()
+
+ def test_ascii_data(self):
+ self.write_line(b'HELO example')
+ self.write_line(b'MAIL From:eggs@example')
+ self.write_line(b'RCPT To:spam@example')
+ self.write_line(b'DATA')
+ self.write_line(b'plain ascii text')
+ self.write_line(b'.')
+ self.assertEqual(self.channel.received_data, b'plain ascii text')
+
+ def test_utf8_data(self):
+ self.write_line(b'HELO example')
+ self.write_line(b'MAIL From:eggs@example')
+ self.write_line(b'RCPT To:spam@example')
+ self.write_line(b'DATA')
+ self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87')
+ self.write_line(b'and some plain ascii')
+ self.write_line(b'.')
+ self.assertEqual(
+ self.channel.received_data,
+ b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87\n'
+ b'and some plain ascii')
+
+
+class SMTPDChannelWithDecodeDataTrue(unittest.TestCase):
+
+ def setUp(self):
+ smtpd.socket = asyncore.socket = mock_socket
+ self.old_debugstream = smtpd.DEBUGSTREAM
+ self.debug = smtpd.DEBUGSTREAM = io.StringIO()
+ self.server = DummyServer('a', 'b')
+ conn, addr = self.server.accept()
+ # Set decode_data to True
+ self.channel = smtpd.SMTPChannel(self.server, conn, addr,
+ decode_data=True)
+
+ def tearDown(self):
+ asyncore.close_all()
+ asyncore.socket = smtpd.socket = socket
+ smtpd.DEBUGSTREAM = self.old_debugstream
+
+ def write_line(self, line):
+ self.channel.socket.queue_recv(line)
+ self.channel.handle_read()
+
+ def test_ascii_data(self):
+ self.write_line(b'HELO example')
+ self.write_line(b'MAIL From:eggs@example')
+ self.write_line(b'RCPT To:spam@example')
+ self.write_line(b'DATA')
+ self.write_line(b'plain ascii text')
+ self.write_line(b'.')
+ self.assertEqual(self.channel.received_data, 'plain ascii text')
+
+ def test_utf8_data(self):
+ self.write_line(b'HELO example')
+ self.write_line(b'MAIL From:eggs@example')
+ self.write_line(b'RCPT To:spam@example')
+ self.write_line(b'DATA')
+ self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87')
+ self.write_line(b'and some plain ascii')
+ self.write_line(b'.')
+ self.assertEqual(
+ self.channel.received_data,
+ 'utf8 enriched text: żźć\nand some plain ascii')
+
+
if __name__ == "__main__":
unittest.main()