Fix some string encoding issues with entity bodies in HTTP requests.
RFC 2616 says that iso-8859-1 is the default charset for HTTP entity
bodies, but we encoded strings using ascii. See
http://bugs.python.org/issue5314. Changed docs and code to use
iso-8859-1.
Also fix some brokenness with passing a file as the body instead of a
string.
Add tests to show that some of this behavior actually works.
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 5e091b8..0ea15ab 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -243,7 +243,6 @@
if line in (b'\r\n', b'\n', b''):
break
hstring = b''.join(headers).decode('iso-8859-1')
-
return email.parser.Parser(_class=_class).parsestr(hstring)
class HTTPResponse(io.RawIOBase):
@@ -675,13 +674,22 @@
if self.debuglevel > 0:
print("send:", repr(str))
try:
- blocksize=8192
- if hasattr(str,'read') :
- if self.debuglevel > 0: print("sendIng a read()able")
- data=str.read(blocksize)
- while data:
+ blocksize = 8192
+ if hasattr(str, "read") :
+ if self.debuglevel > 0:
+ print("sendIng a read()able")
+ encode = False
+ if "b" not in str.mode:
+ encode = True
+ if self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
+ while 1:
+ data = str.read(blocksize)
+ if not data:
+ break
+ if encode:
+ data = data.encode("iso-8859-1")
self.sock.sendall(data)
- data=str.read(blocksize)
else:
self.sock.sendall(str)
except socket.error as v:
@@ -713,8 +721,8 @@
message_body = None
self.send(msg)
if message_body is not None:
- #message_body was not a string (i.e. it is a file) and
- #we must run the risk of Nagle
+ # message_body was not a string (i.e. it is a file), and
+ # we must run the risk of Nagle.
self.send(message_body)
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@@ -904,7 +912,9 @@
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
- body = body.encode('ascii')
+ # RFC 2616 Section 3.7.1 says that text default has a
+ # default charset of iso-8859-1.
+ body = body.encode('iso-8859-1')
self.endheaders(body)
def getresponse(self):
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index a433474..fd5c123 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -272,9 +272,80 @@
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
self.assertEqual(h.timeout, 30)
+class RequestBodyTest(TestCase):
+ """Test cases where a request includes a message body."""
+
+ def setUp(self):
+ self.conn = httplib.HTTPConnection('example.com')
+ self.sock = FakeSocket("")
+ self.conn.sock = self.sock
+
+ def get_headers_and_fp(self):
+ f = io.BytesIO(self.sock.data)
+ f.readline() # read the request line
+ message = httplib.parse_headers(f)
+ return message, f
+
+ def test_manual_content_length(self):
+ # Set an incorrect content-length so that we can verify that
+ # it will not be over-ridden by the library.
+ self.conn.request("PUT", "/url", "body",
+ {"Content-Length": "42"})
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("42", message.get("content-length"))
+ self.assertEqual(4, len(f.read()))
+
+ def test_ascii_body(self):
+ self.conn.request("PUT", "/url", "body")
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("4", message.get("content-length"))
+ self.assertEqual(b'body', f.read())
+
+ def test_latin1_body(self):
+ self.conn.request("PUT", "/url", "body\xc1")
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("5", message.get("content-length"))
+ self.assertEqual(b'body\xc1', f.read())
+
+ def test_bytes_body(self):
+ self.conn.request("PUT", "/url", b"body\xc1")
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("5", message.get("content-length"))
+ self.assertEqual(b'body\xc1', f.read())
+
+ def test_file_body(self):
+ f = open(support.TESTFN, "w")
+ f.write("body")
+ f.close()
+ f = open(support.TESTFN)
+ self.conn.request("PUT", "/url", f)
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("4", message.get("content-length"))
+ self.assertEqual(b'body', f.read())
+
+ def test_binary_file_body(self):
+ f = open(support.TESTFN, "wb")
+ f.write(b"body\xc1")
+ f.close()
+ f = open(support.TESTFN, "rb")
+ self.conn.request("PUT", "/url", f)
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("5", message.get("content-length"))
+ self.assertEqual(b'body\xc1', f.read())
+
def test_main(verbose=None):
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
- HTTPSTimeoutTest)
+ HTTPSTimeoutTest, RequestBodyTest)
if __name__ == '__main__':
test_main()