Optimize backslashreplace error handler
Issue #25318: Optimize backslashreplace and xmlcharrefreplace error handlers in
UTF-8 encoder. Optimize also backslashreplace error handler for ASCII and
Latin1 encoders.
Use the new _PyBytesWriter API to optimize these error handlers for the
encoders. It avoids to create an exception and call the slow implementation of
the error handler.
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index d7a9918..ae99d1a 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -334,7 +334,6 @@
i += (endpos - startpos - 1);
break;
-
case _Py_ERROR_SURROGATEPASS:
for (k=startpos; k<endpos; k++) {
ch = data[k];
@@ -345,6 +344,22 @@
i += (endpos - startpos - 1);
break;
+ case _Py_ERROR_BACKSLASHREPLACE:
+ p = backslashreplace(&writer, max_char_size, p,
+ unicode, startpos, endpos);
+ if (p == NULL)
+ goto error;
+ i += (endpos - startpos - 1);
+ break;
+
+ case _Py_ERROR_XMLCHARREFREPLACE:
+ p = xmlcharrefreplace(&writer, max_char_size, p,
+ unicode, startpos, endpos);
+ if (p == NULL)
+ goto error;
+ i += (endpos - startpos - 1);
+ break;
+
case _Py_ERROR_SURROGATEESCAPE:
for (k=startpos; k<endpos; k++) {
ch = data[k];
@@ -359,7 +374,6 @@
startpos = k;
assert(startpos < endpos);
/* fall through the default handler */
-
default:
rep = unicode_encode_call_errorhandler(
errors, &error_handler_obj, "utf-8", "surrogates not allowed",