Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis and fix by Guido Vranken.

commit: 0e0282eb144ed69616a2a7e0797cf31d58ab2276 [log] [tgz]
author: Serhiy Storchaka <storchaka@gmail.com> Tue Jan 27 22:17:56 2015 +0200
committer: Serhiy Storchaka <storchaka@gmail.com> Tue Jan 27 22:17:56 2015 +0200
tree: 8ef3f2b0736e22ca6d05aec526ca1e65b28bcb83
parent: 3ce465ab56568d5d53b1985cbb3ebe5961bfbf18 [diff]
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index f2018ec..625d08c 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py

@@ -1700,6 +1700,9 @@
         if sys.maxunicode > 0xffff:
             check_format(u'\U0010ffff',
                          b'%c', c_int(0x10ffff))
+        else:
+            with self.assertRaises(OverflowError):
+                PyUnicode_FromFormat(b'%c', c_int(0x10000))
         with self.assertRaises(OverflowError):
             PyUnicode_FromFormat(b'%c', c_int(0x110000))
         # Issue #18183
@@ -1750,8 +1753,45 @@
                      b'%zu', c_size_t(123))
 
         # test long output
+        min_long = -(2 ** (8 * sizeof(c_long) - 1))
+        max_long = -min_long - 1
+        check_format(unicode(min_long),
+                     b'%ld', c_long(min_long))
+        check_format(unicode(max_long),
+                     b'%ld', c_long(max_long))
+        max_ulong = 2 ** (8 * sizeof(c_ulong)) - 1
+        check_format(unicode(max_ulong),
+                     b'%lu', c_ulong(max_ulong))
         PyUnicode_FromFormat(b'%p', c_void_p(-1))
 
+        # test padding (width and/or precision)
+        check_format(u'123'.rjust(10, u'0'),
+                     b'%010i', c_int(123))
+        check_format(u'123'.rjust(100),
+                     b'%100i', c_int(123))
+        check_format(u'123'.rjust(100, u'0'),
+                     b'%.100i', c_int(123))
+        check_format(u'123'.rjust(80, u'0').rjust(100),
+                     b'%100.80i', c_int(123))
+
+        check_format(u'123'.rjust(10, u'0'),
+                     b'%010u', c_uint(123))
+        check_format(u'123'.rjust(100),
+                     b'%100u', c_uint(123))
+        check_format(u'123'.rjust(100, u'0'),
+                     b'%.100u', c_uint(123))
+        check_format(u'123'.rjust(80, u'0').rjust(100),
+                     b'%100.80u', c_uint(123))
+
+        check_format(u'123'.rjust(10, u'0'),
+                     b'%010x', c_int(0x123))
+        check_format(u'123'.rjust(100),
+                     b'%100x', c_int(0x123))
+        check_format(u'123'.rjust(100, u'0'),
+                     b'%.100x', c_int(0x123))
+        check_format(u'123'.rjust(80, u'0').rjust(100),
+                     b'%100.80x', c_int(0x123))
+
         # test %V
         check_format(u'repr=abc',
                      b'repr=%V', u'abc', b'xyz')

diff --git a/Misc/NEWS b/Misc/NEWS
index 593b70b..b00193e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS

@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV.  Analysis
+  and fix by Guido Vranken.
+
 - Issue #23048: Fix jumping out of an infinite while loop in the pdb.
 
 Library

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 340f8cc..2e5f5fd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c

@@ -735,15 +735,10 @@
       * objects once during step 3 and put the result in an array) */
     for (f = format; *f; f++) {
          if (*f == '%') {
-             if (*(f+1)=='%')
-                 continue;
-             if (*(f+1)=='S' || *(f+1)=='R')
-                 ++callcount;
-             while (isdigit((unsigned)*f))
-                 width = (width*10) + *f++ - '0';
-             while (*++f && *f != '%' && !isalpha((unsigned)*f))
-                 ;
-             if (*f == 's')
+             f++;
+             while (*f && *f != '%' && !isalpha((unsigned)*f))
+                 f++;
+             if (*f == 's' || *f=='S' || *f=='R')
                  ++callcount;
          }
     }
@@ -760,12 +755,16 @@
     /* step 3: figure out how large a buffer we need */
     for (f = format; *f; f++) {
         if (*f == '%') {
-            const char* p = f;
+            const char* p = f++;
             width = 0;
             while (isdigit((unsigned)*f))
                 width = (width*10) + *f++ - '0';
-            while (*++f && *f != '%' && !isalpha((unsigned)*f))
-                ;
+            precision = 0;
+            if (*f == '.') {
+                f++;
+                while (isdigit((unsigned)*f))
+                    precision = (precision*10) + *f++ - '0';
+            }
 
             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
              * they don't affect the amount of space we reserve.
@@ -800,6 +799,8 @@
                 break;
             case 'd': case 'u': case 'i': case 'x':
                 (void) va_arg(count, int);
+                if (width < precision)
+                    width = precision;
                 /* 20 bytes is enough to hold a 64-bit
                    integer.  Decimal takes the most space.
                    This isn't enough for octal.
commit	0e0282eb144ed69616a2a7e0797cf31d58ab2276	[log] [tgz]
author	Serhiy Storchaka <storchaka@gmail.com>	Tue Jan 27 22:17:56 2015 +0200
committer	Serhiy Storchaka <storchaka@gmail.com>	Tue Jan 27 22:17:56 2015 +0200
tree	8ef3f2b0736e22ca6d05aec526ca1e65b28bcb83
parent	3ce465ab56568d5d53b1985cbb3ebe5961bfbf18 [diff]