Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index f2018ec..625d08c 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1700,6 +1700,9 @@
if sys.maxunicode > 0xffff:
check_format(u'\U0010ffff',
b'%c', c_int(0x10ffff))
+ else:
+ with self.assertRaises(OverflowError):
+ PyUnicode_FromFormat(b'%c', c_int(0x10000))
with self.assertRaises(OverflowError):
PyUnicode_FromFormat(b'%c', c_int(0x110000))
# Issue #18183
@@ -1750,8 +1753,45 @@
b'%zu', c_size_t(123))
# test long output
+ min_long = -(2 ** (8 * sizeof(c_long) - 1))
+ max_long = -min_long - 1
+ check_format(unicode(min_long),
+ b'%ld', c_long(min_long))
+ check_format(unicode(max_long),
+ b'%ld', c_long(max_long))
+ max_ulong = 2 ** (8 * sizeof(c_ulong)) - 1
+ check_format(unicode(max_ulong),
+ b'%lu', c_ulong(max_ulong))
PyUnicode_FromFormat(b'%p', c_void_p(-1))
+ # test padding (width and/or precision)
+ check_format(u'123'.rjust(10, u'0'),
+ b'%010i', c_int(123))
+ check_format(u'123'.rjust(100),
+ b'%100i', c_int(123))
+ check_format(u'123'.rjust(100, u'0'),
+ b'%.100i', c_int(123))
+ check_format(u'123'.rjust(80, u'0').rjust(100),
+ b'%100.80i', c_int(123))
+
+ check_format(u'123'.rjust(10, u'0'),
+ b'%010u', c_uint(123))
+ check_format(u'123'.rjust(100),
+ b'%100u', c_uint(123))
+ check_format(u'123'.rjust(100, u'0'),
+ b'%.100u', c_uint(123))
+ check_format(u'123'.rjust(80, u'0').rjust(100),
+ b'%100.80u', c_uint(123))
+
+ check_format(u'123'.rjust(10, u'0'),
+ b'%010x', c_int(0x123))
+ check_format(u'123'.rjust(100),
+ b'%100x', c_int(0x123))
+ check_format(u'123'.rjust(100, u'0'),
+ b'%.100x', c_int(0x123))
+ check_format(u'123'.rjust(80, u'0').rjust(100),
+ b'%100.80x', c_int(0x123))
+
# test %V
check_format(u'repr=abc',
b'repr=%V', u'abc', b'xyz')
diff --git a/Misc/NEWS b/Misc/NEWS
index 593b70b..b00193e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
+ and fix by Guido Vranken.
+
- Issue #23048: Fix jumping out of an infinite while loop in the pdb.
Library
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 340f8cc..2e5f5fd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -735,15 +735,10 @@
* objects once during step 3 and put the result in an array) */
for (f = format; *f; f++) {
if (*f == '%') {
- if (*(f+1)=='%')
- continue;
- if (*(f+1)=='S' || *(f+1)=='R')
- ++callcount;
- while (isdigit((unsigned)*f))
- width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !isalpha((unsigned)*f))
- ;
- if (*f == 's')
+ f++;
+ while (*f && *f != '%' && !isalpha((unsigned)*f))
+ f++;
+ if (*f == 's' || *f=='S' || *f=='R')
++callcount;
}
}
@@ -760,12 +755,16 @@
/* step 3: figure out how large a buffer we need */
for (f = format; *f; f++) {
if (*f == '%') {
- const char* p = f;
+ const char* p = f++;
width = 0;
while (isdigit((unsigned)*f))
width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !isalpha((unsigned)*f))
- ;
+ precision = 0;
+ if (*f == '.') {
+ f++;
+ while (isdigit((unsigned)*f))
+ precision = (precision*10) + *f++ - '0';
+ }
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
* they don't affect the amount of space we reserve.
@@ -800,6 +799,8 @@
break;
case 'd': case 'u': case 'i': case 'x':
(void) va_arg(count, int);
+ if (width < precision)
+ width = precision;
/* 20 bytes is enough to hold a 64-bit
integer. Decimal takes the most space.
This isn't enough for octal.