Issue #28774: Fix start/end pos in unicode_encode_ucs1().
Fix error position of the unicode error in ASCII and Latin1
encoders when a string returned by the error handler contains multiple
non-encodable characters (non-ASCII for the ASCII codec, characters out
of the U+0000-U+00FF range for Latin1).
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e88a126..2bf48b7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6798,7 +6798,7 @@
goto onError;
/* subtract preallocated bytes */
- writer.min_size -= 1;
+ writer.min_size -= newpos - collstart;
if (PyBytes_Check(rep)) {
/* Directly copy bytes result to output. */
@@ -6835,7 +6835,7 @@
ch = PyUnicode_READ_CHAR(rep, i);
if (ch >= limit) {
raise_encode_exception(&exc, encoding, unicode,
- pos, pos+1, reason);
+ collstart, collend, reason);
goto onError;
}
*str = (char)ch;