Branch merge
diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst
index df25910..d6a35c3 100644
--- a/Doc/library/curses.rst
+++ b/Doc/library/curses.rst
@@ -566,7 +566,7 @@
Instantiate the string *str* with the supplied parameters, where *str* should
be a parameterized string obtained from the terminfo database. E.g.
- ``tparm(tigetstr("cup"), 5, 3)`` could result in ``'\033[6;4H'``, the exact
+ ``tparm(tigetstr("cup"), 5, 3)`` could result in ``b'\033[6;4H'``, the exact
result depending on terminal type.
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 6e613eb..dc3bad7 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1425,6 +1425,12 @@
(unicode ordinal -> char ordinal) */
const char *errors /* error handling */
);
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
+ PyObject *unicode, /* Unicode object */
+ PyObject *mapping, /* character mapping
+ (unicode ordinal -> char ordinal) */
+ const char *errors /* error handling */
+ );
#endif
/* Translate a Py_UNICODE buffer of the given length by applying a
diff --git a/Lib/http/server.py b/Lib/http/server.py
index e571418..b79d191 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -105,6 +105,7 @@
DEFAULT_ERROR_MESSAGE = """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
+<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<title>Error response</title>
@@ -734,10 +735,16 @@
list.sort(key=lambda a: a.lower())
r = []
displaypath = html.escape(urllib.parse.unquote(self.path))
- r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
- r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
- r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
- r.append("<hr>\n<ul>\n")
+ enc = sys.getfilesystemencoding()
+ title = 'Directory listing for %s' % displaypath
+ r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
+ '"http://www.w3.org/TR/html4/strict.dtd">')
+ r.append('<html>\n<head>')
+ r.append('<meta http-equiv="Content-Type" '
+ 'content="text/html; charset=%s">' % enc)
+ r.append('<title>%s</title>\n</head>' % title)
+ r.append('<body>\n<h1>%s</h1>' % title)
+ r.append('<hr>\n<ul>')
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
@@ -748,11 +755,10 @@
if os.path.islink(fullname):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
- r.append('<li><a href="%s">%s</a>\n'
+ r.append('<li><a href="%s">%s</a></li>'
% (urllib.parse.quote(linkname), html.escape(displayname)))
- r.append("</ul>\n<hr>\n</body>\n</html>\n")
- enc = sys.getfilesystemencoding()
- encoded = ''.join(r).encode(enc)
+ r.append('</ul>\n<hr>\n</body>\n</html>\n')
+ encoded = '\n'.join(r).encode(enc)
f = io.BytesIO()
f.write(encoded)
f.seek(0)
diff --git a/Lib/test/test_asyncore.py b/Lib/test/test_asyncore.py
index c1b8637..52dff0f 100644
--- a/Lib/test/test_asyncore.py
+++ b/Lib/test/test_asyncore.py
@@ -675,6 +675,7 @@
class TestClient(BaseClient):
def handle_expt(self):
+ self.socket.recv(1024, socket.MSG_OOB)
self.flag = True
class TestHandler(BaseTestHandler):
diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py
index b4673e9..ccbbc23 100644
--- a/Lib/test/test_curses.py
+++ b/Lib/test/test_curses.py
@@ -190,7 +190,7 @@
curses.tigetflag('hc')
curses.tigetnum('co')
curses.tigetstr('cr')
- curses.tparm('cr')
+ curses.tparm(b'cr')
curses.typeahead(sys.__stdin__.fileno())
curses.unctrl('a')
curses.ungetch('a')
@@ -280,6 +280,10 @@
if read != ch:
raise AssertionError("%r != %r" % (read, ch))
+def test_issue10570():
+ b = curses.tparm(curses.tigetstr("cup"), 5, 3)
+ assert type(b) is bytes
+
def main(stdscr):
curses.savetty()
try:
@@ -289,6 +293,7 @@
test_resize_term(stdscr)
test_issue6243(stdscr)
test_unget_wch(stdscr)
+ test_issue10570()
finally:
curses.resetty()
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index 1bbaf0e..cc15dd6 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -259,8 +259,9 @@
with open(os.path.join(self.tempdir_name, 'index.html'), 'w') as f:
response = self.request('/' + self.tempdir_name + '/')
self.check_status_and_reason(response, 200)
- if os.name == 'posix':
- # chmod won't work as expected on Windows platforms
+ # chmod() doesn't work as expected on Windows, and filesystem
+ # permissions are ignored by root on Unix.
+ if os.name == 'posix' and os.geteuid() != 0:
os.chmod(self.tempdir, 0)
response = self.request(self.tempdir_name + '/')
self.check_status_and_reason(response, 404)
@@ -305,6 +306,9 @@
form.getfirst("bacon")))
"""
+
+@unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0,
+ "This test can't be run reliably as root (issue #13308).")
class CGIHTTPServerTestCase(BaseTestCase):
class request_handler(NoLogRequestHandler, CGIHTTPRequestHandler):
pass
diff --git a/Misc/ACKS b/Misc/ACKS
index 821a812..65c402c 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -919,6 +919,7 @@
Oliver Steele
Greg Stein
Chris Stern
+Alex Stewart
Victor Stinner
Richard Stoakley
Peter Stoehr
diff --git a/Misc/NEWS b/Misc/NEWS
index d58da6a..b46cea8 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -353,10 +353,15 @@
- Byte compilation in packaging is now isolated from the calling Python -B or
-O options, instead of being disallowed under -B or buggy under -O.
+- Issue #10570: curses.tigetstr() is now expecting a byte string, instead of
+ a Unicode string.
+
+- Issue #13295: http.server now produces valid HTML 4.01 strict.
+
- Issue #2892: preserve iterparse events in case of SyntaxError.
- Issue #13287: urllib.request and urllib.error now contains an __all__
- attribute to expose only public classes and functions. Patch by Florent
+ attribute to expose only relevant classes and functions. Patch by Florent
Xicluna.
- Issue #670664: Fix HTMLParser to correctly handle the content of
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index be31fd2..c9409cc 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -992,11 +992,7 @@
str = PyUnicode_FromObject(str);
if (str == NULL)
return NULL;
- v = codec_tuple(PyUnicode_EncodeCharmap(
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- mapping,
- errors),
+ v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
PyUnicode_GET_SIZE(str));
Py_DECREF(str);
return v;
diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c
index 8790243..cc42f4b 100644
--- a/Modules/_cursesmodule.c
+++ b/Modules/_cursesmodule.c
@@ -2642,7 +2642,7 @@
PyCursesSetupTermCalled;
- if (!PyArg_ParseTuple(args, "s|iiiiiiiii:tparm",
+ if (!PyArg_ParseTuple(args, "y|iiiiiiiii:tparm",
&fmt, &i1, &i2, &i3, &i4,
&i5, &i6, &i7, &i8, &i9)) {
return NULL;
diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c
index bfc5985..6b7e3fc 100644
--- a/Modules/fcntlmodule.c
+++ b/Modules/fcntlmodule.c
@@ -540,10 +540,13 @@
if (ins(d, "F_SHLCK", (long)F_SHLCK)) return -1;
#endif
-/* OS X (and maybe others) let you tell the storage device to flush to physical media */
+/* OS X specifics */
#ifdef F_FULLFSYNC
if (ins(d, "F_FULLFSYNC", (long)F_FULLFSYNC)) return -1;
#endif
+#ifdef F_NOCACHE
+ if (ins(d, "F_NOCACHE", (long)F_NOCACHE)) return -1;
+#endif
/* For F_{GET|SET}FL */
#ifdef FD_CLOEXEC
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0a33ece..73f7926 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -248,7 +248,7 @@
static PyObject *
unicode_encode_call_errorhandler(const char *errors,
PyObject **errorHandler,const char *encoding, const char *reason,
- const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
+ PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
static void
@@ -4745,8 +4745,7 @@
#endif
rep = unicode_encode_call_errorhandler(
errors, &errorHandler, "utf-8", "surrogates not allowed",
- PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode),
- &exc, startpos, startpos+1, &newpos);
+ obj, &exc, startpos, startpos+1, &newpos);
if (!rep)
goto error;
@@ -6450,7 +6449,7 @@
{
if (*exceptionObject == NULL) {
*exceptionObject = PyObject_CallFunction(
- PyExc_UnicodeEncodeError, "sUnns",
+ PyExc_UnicodeEncodeError, "sOnns",
encoding, unicode, startpos, endpos, reason);
}
else {
@@ -6502,12 +6501,12 @@
unicode_encode_call_errorhandler(const char *errors,
PyObject **errorHandler,
const char *encoding, const char *reason,
- const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
+ PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos)
{
static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
-
+ Py_ssize_t len;
PyObject *restuple;
PyObject *resunicode;
@@ -6517,8 +6516,12 @@
return NULL;
}
- make_encode_exception(exceptionObject,
- encoding, unicode, size, startpos, endpos, reason);
+ if (PyUnicode_READY(unicode) < 0)
+ return NULL;
+ len = PyUnicode_GET_LENGTH(unicode);
+
+ make_encode_exception_obj(exceptionObject,
+ encoding, unicode, startpos, endpos, reason);
if (*exceptionObject == NULL)
return NULL;
@@ -6542,8 +6545,8 @@
return NULL;
}
if (*newpos<0)
- *newpos = size+*newpos;
- if (*newpos<0 || *newpos>size) {
+ *newpos = len + *newpos;
+ if (*newpos<0 || *newpos>len) {
PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
Py_DECREF(restuple);
return NULL;
@@ -6554,18 +6557,16 @@
}
static PyObject *
-unicode_encode_ucs1(const Py_UNICODE *p,
- Py_ssize_t size,
+unicode_encode_ucs1(PyObject *unicode,
const char *errors,
int limit)
{
+ /* input state */
+ Py_ssize_t pos=0, size;
+ int kind;
+ void *data;
/* output object */
PyObject *res;
- /* pointers to the beginning and end+1 of input */
- const Py_UNICODE *startp = p;
- const Py_UNICODE *endp = p + size;
- /* pointer to the beginning of the unencodable characters */
- /* const Py_UNICODE *badp = NULL; */
/* pointer into the output */
char *str;
/* current output position */
@@ -6578,6 +6579,11 @@
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
+ if (PyUnicode_READY(unicode) < 0)
+ return NULL;
+ size = PyUnicode_GET_LENGTH(unicode);
+ kind = PyUnicode_KIND(unicode);
+ data = PyUnicode_DATA(unicode);
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
if (size == 0)
@@ -6588,28 +6594,24 @@
str = PyBytes_AS_STRING(res);
ressize = size;
- while (p<endp) {
- Py_UNICODE c = *p;
+ while (pos < size) {
+ Py_UCS4 c = PyUnicode_READ(kind, data, pos);
/* can we encode this? */
if (c<limit) {
/* no overflow check, because we know that the space is enough */
*str++ = (char)c;
- ++p;
+ ++pos;
}
else {
- Py_ssize_t unicodepos = p-startp;
Py_ssize_t requiredsize;
PyObject *repunicode;
- Py_ssize_t repsize;
- Py_ssize_t newpos;
- Py_ssize_t respos;
- Py_UNICODE *uni2;
+ Py_ssize_t repsize, newpos, respos, i;
/* startpos for collecting unencodable chars */
- const Py_UNICODE *collstart = p;
- const Py_UNICODE *collend = p;
+ Py_ssize_t collstart = pos;
+ Py_ssize_t collend = pos;
/* find all unecodable characters */
- while ((collend < endp) && ((*collend)>=limit))
+ while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit))
++collend;
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) {
@@ -6626,39 +6628,40 @@
}
switch (known_errorHandler) {
case 1: /* strict */
- raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
+ raise_encode_exception_obj(&exc, encoding, unicode, collstart, collend, reason);
goto onError;
case 2: /* replace */
while (collstart++<collend)
*str++ = '?'; /* fall through */
case 3: /* ignore */
- p = collend;
+ pos = collend;
break;
case 4: /* xmlcharrefreplace */
respos = str - PyBytes_AS_STRING(res);
- /* determine replacement size (temporarily (mis)uses p) */
- for (p = collstart, repsize = 0; p < collend; ++p) {
- if (*p<10)
+ /* determine replacement size */
+ for (i = collstart, repsize = 0; i < collend; ++i) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ if (ch < 10)
repsize += 2+1+1;
- else if (*p<100)
+ else if (ch < 100)
repsize += 2+2+1;
- else if (*p<1000)
+ else if (ch < 1000)
repsize += 2+3+1;
- else if (*p<10000)
+ else if (ch < 10000)
repsize += 2+4+1;
#ifndef Py_UNICODE_WIDE
else
repsize += 2+5+1;
#else
- else if (*p<100000)
+ else if (ch < 100000)
repsize += 2+5+1;
- else if (*p<1000000)
+ else if (ch < 1000000)
repsize += 2+6+1;
else
repsize += 2+7+1;
#endif
}
- requiredsize = respos+repsize+(endp-collend);
+ requiredsize = respos+repsize+(size-collend);
if (requiredsize > ressize) {
if (requiredsize<2*ressize)
requiredsize = 2*ressize;
@@ -6667,17 +6670,18 @@
str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize;
}
- /* generate replacement (temporarily (mis)uses p) */
- for (p = collstart; p < collend; ++p) {
- str += sprintf(str, "&#%d;", (int)*p);
+ /* generate replacement */
+ for (i = collstart; i < collend; ++i) {
+ str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
}
- p = collend;
+ pos = collend;
break;
default:
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
- encoding, reason, startp, size, &exc,
- collstart-startp, collend-startp, &newpos);
- if (repunicode == NULL)
+ encoding, reason, unicode, &exc,
+ collstart, collend, &newpos);
+ if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
+ PyUnicode_READY(repunicode) < 0))
goto onError;
if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */
@@ -6694,7 +6698,7 @@
}
memcpy(str, PyBytes_AsString(repunicode), repsize);
str += repsize;
- p = startp + newpos;
+ pos = newpos;
Py_DECREF(repunicode);
break;
}
@@ -6702,8 +6706,8 @@
have+the replacement+the rest of the string, so
we won't have to check space for encodable characters) */
respos = str - PyBytes_AS_STRING(res);
- repsize = PyUnicode_GET_SIZE(repunicode);
- requiredsize = respos+repsize+(endp-collend);
+ repsize = PyUnicode_GET_LENGTH(repunicode);
+ requiredsize = respos+repsize+(size-collend);
if (requiredsize > ressize) {
if (requiredsize<2*ressize)
requiredsize = 2*ressize;
@@ -6716,17 +6720,17 @@
}
/* check if there is anything unencodable in the replacement
and copy it to the output */
- for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
- c = *uni2;
+ for (i = 0; repsize-->0; ++i, ++str) {
+ c = PyUnicode_READ_CHAR(repunicode, i);
if (c >= limit) {
- raise_encode_exception(&exc, encoding, startp, size,
- unicodepos, unicodepos+1, reason);
+ raise_encode_exception_obj(&exc, encoding, unicode,
+ pos, pos+1, reason);
Py_DECREF(repunicode);
goto onError;
}
*str = (char)c;
}
- p = startp + newpos;
+ pos = newpos;
Py_DECREF(repunicode);
}
}
@@ -6750,12 +6754,19 @@
return NULL;
}
+/* Deprecated */
PyObject *
PyUnicode_EncodeLatin1(const Py_UNICODE *p,
Py_ssize_t size,
const char *errors)
{
- return unicode_encode_ucs1(p, size, errors, 256);
+ PyObject *result;
+ PyObject *unicode = PyUnicode_FromUnicode(p, size);
+ if (unicode == NULL)
+ return NULL;
+ result = unicode_encode_ucs1(unicode, errors, 256);
+ Py_DECREF(unicode);
+ return result;
}
PyObject *
@@ -6774,9 +6785,7 @@
PyUnicode_GET_LENGTH(unicode));
/* Non-Latin-1 characters present. Defer to above function to
raise the exception. */
- return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- errors);
+ return unicode_encode_ucs1(unicode, errors, 256);
}
PyObject*
@@ -6888,12 +6897,19 @@
return NULL;
}
+/* Deprecated */
PyObject *
PyUnicode_EncodeASCII(const Py_UNICODE *p,
Py_ssize_t size,
const char *errors)
{
- return unicode_encode_ucs1(p, size, errors, 128);
+ PyObject *result;
+ PyObject *unicode = PyUnicode_FromUnicode(p, size);
+ if (unicode == NULL)
+ return NULL;
+ result = unicode_encode_ucs1(unicode, errors, 128);
+ Py_DECREF(unicode);
+ return result;
}
PyObject *
@@ -6910,9 +6926,7 @@
if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
PyUnicode_GET_LENGTH(unicode));
- return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- errors);
+ return unicode_encode_ucs1(unicode, errors, 128);
}
PyObject *
@@ -8182,13 +8196,13 @@
Return 0 on success, -1 on error */
static int
charmap_encoding_error(
- const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping,
+ PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject,
int *known_errorHandler, PyObject **errorHandler, const char *errors,
PyObject **res, Py_ssize_t *respos)
{
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
- Py_ssize_t repsize;
+ Py_ssize_t size, repsize;
Py_ssize_t newpos;
Py_UNICODE *uni2;
/* startpos for collecting unencodable chars */
@@ -8198,19 +8212,26 @@
char *encoding = "charmap";
char *reason = "character maps to <undefined>";
charmapencode_result x;
+ Py_UCS4 ch;
+ int val;
+ if (PyUnicode_READY(unicode) < 0)
+ return -1;
+ size = PyUnicode_GET_LENGTH(unicode);
/* find all unencodable characters */
while (collendpos < size) {
PyObject *rep;
if (Py_TYPE(mapping) == &EncodingMapType) {
- int res = encoding_map_lookup(p[collendpos], mapping);
- if (res != -1)
+ ch = PyUnicode_READ_CHAR(unicode, collendpos);
+ val = encoding_map_lookup(ch, mapping);
+ if (val != -1)
break;
++collendpos;
continue;
}
- rep = charmapencode_lookup(p[collendpos], mapping);
+ ch = PyUnicode_READ_CHAR(unicode, collendpos);
+ rep = charmapencode_lookup(ch, mapping);
if (rep==NULL)
return -1;
else if (rep!=Py_None) {
@@ -8236,7 +8257,7 @@
}
switch (*known_errorHandler) {
case 1: /* strict */
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1;
case 2: /* replace */
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
@@ -8245,7 +8266,7 @@
return -1;
}
else if (x==enc_FAILED) {
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1;
}
}
@@ -8258,13 +8279,13 @@
for (collpos = collstartpos; collpos < collendpos; ++collpos) {
char buffer[2+29+1+1];
char *cp;
- sprintf(buffer, "&#%d;", (int)p[collpos]);
+ sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos));
for (cp = buffer; *cp; ++cp) {
x = charmapencode_output(*cp, mapping, res, respos);
if (x==enc_EXCEPTION)
return -1;
else if (x==enc_FAILED) {
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1;
}
}
@@ -8273,7 +8294,7 @@
break;
default:
repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
- encoding, reason, p, size, exceptionObject,
+ encoding, reason, unicode, exceptionObject,
collstartpos, collendpos, &newpos);
if (repunicode == NULL)
return -1;
@@ -8305,7 +8326,7 @@
}
else if (x==enc_FAILED) {
Py_DECREF(repunicode);
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1;
}
}
@@ -8316,15 +8337,15 @@
}
PyObject *
-PyUnicode_EncodeCharmap(const Py_UNICODE *p,
- Py_ssize_t size,
- PyObject *mapping,
- const char *errors)
+_PyUnicode_EncodeCharmap(PyObject *unicode,
+ PyObject *mapping,
+ const char *errors)
{
/* output object */
PyObject *res = NULL;
/* current input position */
Py_ssize_t inpos = 0;
+ Py_ssize_t size;
/* current output position */
Py_ssize_t respos = 0;
PyObject *errorHandler = NULL;
@@ -8334,9 +8355,13 @@
* 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
+ if (PyUnicode_READY(unicode) < 0)
+ return NULL;
+ size = PyUnicode_GET_LENGTH(unicode);
+
/* Default to Latin-1 */
if (mapping == NULL)
- return PyUnicode_EncodeLatin1(p, size, errors);
+ return unicode_encode_ucs1(unicode, errors, 256);
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
@@ -8347,12 +8372,13 @@
return res;
while (inpos<size) {
+ Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
/* try to encode it */
- charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
+ charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
if (x==enc_EXCEPTION) /* error */
goto onError;
if (x==enc_FAILED) { /* unencodable character */
- if (charmap_encoding_error(p, size, &inpos, mapping,
+ if (charmap_encoding_error(unicode, &inpos, mapping,
&exc,
&known_errorHandler, &errorHandler, errors,
&res, &respos)) {
@@ -8380,6 +8406,22 @@
return NULL;
}
+/* Deprecated */
+PyObject *
+PyUnicode_EncodeCharmap(const Py_UNICODE *p,
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors)
+{
+ PyObject *result;
+ PyObject *unicode = PyUnicode_FromUnicode(p, size);
+ if (unicode == NULL)
+ return NULL;
+ result = _PyUnicode_EncodeCharmap(unicode, mapping, errors);
+ Py_DECREF(unicode);
+ return NULL;
+}
+
PyObject *
PyUnicode_AsCharmapString(PyObject *unicode,
PyObject *mapping)
@@ -8388,10 +8430,7 @@
PyErr_BadArgument();
return NULL;
}
- return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- mapping,
- NULL);
+ return _PyUnicode_EncodeCharmap(unicode, mapping, NULL);
}
/* create or adjust a UnicodeTranslateError */
@@ -8893,6 +8932,7 @@
Py_UNICODE *p, *end;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
+ PyObject *unicode;
const char *encoding = "decimal";
const char *reason = "invalid decimal Unicode string";
/* the following variable is used for caching string comparisons
@@ -8973,9 +9013,13 @@
p = collend;
break;
default:
+ unicode = PyUnicode_FromUnicode(s, length);
+ if (unicode == NULL)
+ goto onError;
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
- encoding, reason, s, length, &exc,
+ encoding, reason, unicode, &exc,
collstart-s, collend-s, &newpos);
+ Py_DECREF(unicode);
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {