Fix distutils’ check and register Unicode handling (#13114).
The check command was fixed by Kirill Kuzminykh.
The register command was using StringIO.getvalue, which uses “''.join”
and thus coerces to str using the default encoding (ASCII), so I changed
the code to use one extra intermediary list and correctly encode to
UTF-8.
diff --git a/Lib/distutils/command/check.py b/Lib/distutils/command/check.py
index bc29baa..4b64e45 100644
--- a/Lib/distutils/command/check.py
+++ b/Lib/distutils/command/check.py
@@ -5,6 +5,7 @@
__revision__ = "$Id$"
from distutils.core import Command
+from distutils.dist import PKG_INFO_ENCODING
from distutils.errors import DistutilsSetupError
try:
@@ -108,6 +109,8 @@
def check_restructuredtext(self):
"""Checks if the long string fields are reST-compliant."""
data = self.distribution.get_long_description()
+ if not isinstance(data, unicode):
+ data = data.decode(PKG_INFO_ENCODING)
for warning in self._check_rst_data(data):
line = warning[-1].get('line')
if line is None:
diff --git a/Lib/distutils/command/register.py b/Lib/distutils/command/register.py
index dc08990..edb42b9 100644
--- a/Lib/distutils/command/register.py
+++ b/Lib/distutils/command/register.py
@@ -10,7 +10,6 @@
import urllib2
import getpass
import urlparse
-import StringIO
from warnings import warn
from distutils.core import PyPIRCCommand
@@ -260,21 +259,30 @@
boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
sep_boundary = '\n--' + boundary
end_boundary = sep_boundary + '--'
- body = StringIO.StringIO()
+ chunks = []
for key, value in data.items():
# handle multiple entries for the same name
if type(value) not in (type([]), type( () )):
value = [value]
for value in value:
- body.write(sep_boundary)
- body.write('\nContent-Disposition: form-data; name="%s"'%key)
- body.write("\n\n")
- body.write(value)
+ chunks.append(sep_boundary)
+ chunks.append('\nContent-Disposition: form-data; name="%s"'%key)
+ chunks.append("\n\n")
+ chunks.append(value)
if value and value[-1] == '\r':
- body.write('\n') # write an extra newline (lurve Macs)
- body.write(end_boundary)
- body.write("\n")
- body = body.getvalue()
+ chunks.append('\n') # write an extra newline (lurve Macs)
+ chunks.append(end_boundary)
+ chunks.append("\n")
+
+ # chunks may be bytes (str) or unicode objects that we need to encode
+ body = []
+ for chunk in chunks:
+ if isinstance(chunk, unicode):
+ body.append(chunk.encode('utf-8'))
+ else:
+ body.append(chunk)
+
+ body = ''.join(body)
# build the Request
headers = {