| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 1 | /* implements the unicode (as opposed to string) version of the | 
 | 2 |    built-in formatters for string, int, float.  that is, the versions | 
 | 3 |    of int.__float__, etc., that take and return unicode objects */ | 
 | 4 |  | 
 | 5 | #include "Python.h" | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 6 | #include <locale.h> | 
 | 7 |  | 
 | 8 | /* Raises an exception about an unknown presentation type for this | 
 | 9 |  * type. */ | 
 | 10 |  | 
 | 11 | static void | 
 | 12 | unknown_presentation_type(Py_UCS4 presentation_type, | 
 | 13 |                           const char* type_name) | 
 | 14 | { | 
 | 15 |     /* %c might be out-of-range, hence the two cases. */ | 
 | 16 |     if (presentation_type > 32 && presentation_type < 128) | 
 | 17 |         PyErr_Format(PyExc_ValueError, | 
 | 18 |                      "Unknown format code '%c' " | 
 | 19 |                      "for object of type '%.200s'", | 
 | 20 |                      (char)presentation_type, | 
 | 21 |                      type_name); | 
 | 22 |     else | 
 | 23 |         PyErr_Format(PyExc_ValueError, | 
 | 24 |                      "Unknown format code '\\x%x' " | 
 | 25 |                      "for object of type '%.200s'", | 
 | 26 |                      (unsigned int)presentation_type, | 
 | 27 |                      type_name); | 
 | 28 | } | 
 | 29 |  | 
 | 30 | static void | 
 | 31 | invalid_comma_type(Py_UCS4 presentation_type) | 
 | 32 | { | 
 | 33 |     if (presentation_type > 32 && presentation_type < 128) | 
 | 34 |         PyErr_Format(PyExc_ValueError, | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 35 |                      "Cannot specify ',' or '_' with '%c'.", | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 36 |                      (char)presentation_type); | 
 | 37 |     else | 
 | 38 |         PyErr_Format(PyExc_ValueError, | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 39 |                      "Cannot specify ',' or '_' with '\\x%x'.", | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 40 |                      (unsigned int)presentation_type); | 
 | 41 | } | 
 | 42 |  | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 43 | static void | 
| Benjamin Peterson | eb0dfa9 | 2016-09-09 20:14:05 -0700 | [diff] [blame^] | 44 | invalid_comma_and_underscore(void) | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 45 | { | 
 | 46 |     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'."); | 
 | 47 | } | 
 | 48 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 49 | /* | 
 | 50 |     get_integer consumes 0 or more decimal digit characters from an | 
 | 51 |     input string, updates *result with the corresponding positive | 
 | 52 |     integer, and returns the number of digits consumed. | 
 | 53 |  | 
 | 54 |     returns -1 on error. | 
 | 55 | */ | 
 | 56 | static int | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 57 | get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 58 |                   Py_ssize_t *result) | 
 | 59 | { | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 60 |     Py_ssize_t accumulator, digitval, pos = *ppos; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 61 |     int numdigits; | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 62 |     int kind = PyUnicode_KIND(str); | 
 | 63 |     void *data = PyUnicode_DATA(str); | 
 | 64 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 65 |     accumulator = numdigits = 0; | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 66 |     for (; pos < end; pos++, numdigits++) { | 
 | 67 |         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos)); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 68 |         if (digitval < 0) | 
 | 69 |             break; | 
 | 70 |         /* | 
| Mark Dickinson | 47862d4 | 2011-12-01 15:27:04 +0000 | [diff] [blame] | 71 |            Detect possible overflow before it happens: | 
 | 72 |  | 
 | 73 |               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if | 
 | 74 |               accumulator > (PY_SSIZE_T_MAX - digitval) / 10. | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 75 |         */ | 
| Mark Dickinson | 47862d4 | 2011-12-01 15:27:04 +0000 | [diff] [blame] | 76 |         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 77 |             PyErr_Format(PyExc_ValueError, | 
 | 78 |                          "Too many decimal digits in format string"); | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 79 |             *ppos = pos; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 80 |             return -1; | 
 | 81 |         } | 
| Mark Dickinson | 47862d4 | 2011-12-01 15:27:04 +0000 | [diff] [blame] | 82 |         accumulator = accumulator * 10 + digitval; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 83 |     } | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 84 |     *ppos = pos; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 85 |     *result = accumulator; | 
 | 86 |     return numdigits; | 
 | 87 | } | 
 | 88 |  | 
 | 89 | /************************************************************************/ | 
 | 90 | /*********** standard format specifier parsing **************************/ | 
 | 91 | /************************************************************************/ | 
 | 92 |  | 
 | 93 | /* returns true if this character is a specifier alignment token */ | 
 | 94 | Py_LOCAL_INLINE(int) | 
 | 95 | is_alignment_token(Py_UCS4 c) | 
 | 96 | { | 
 | 97 |     switch (c) { | 
 | 98 |     case '<': case '>': case '=': case '^': | 
 | 99 |         return 1; | 
 | 100 |     default: | 
 | 101 |         return 0; | 
 | 102 |     } | 
 | 103 | } | 
 | 104 |  | 
 | 105 | /* returns true if this character is a sign element */ | 
 | 106 | Py_LOCAL_INLINE(int) | 
 | 107 | is_sign_element(Py_UCS4 c) | 
 | 108 | { | 
 | 109 |     switch (c) { | 
 | 110 |     case ' ': case '+': case '-': | 
 | 111 |         return 1; | 
 | 112 |     default: | 
 | 113 |         return 0; | 
 | 114 |     } | 
 | 115 | } | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 116 |  | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 117 | /* Locale type codes. LT_NO_LOCALE must be zero. */ | 
 | 118 | #define LT_NO_LOCALE 0 | 
 | 119 | #define LT_DEFAULT_LOCALE 1 | 
 | 120 | #define LT_UNDERSCORE_LOCALE 2 | 
 | 121 | #define LT_UNDER_FOUR_LOCALE 3 | 
 | 122 | #define LT_CURRENT_LOCALE 4 | 
| Eric Smith | 4a7d76d | 2008-05-30 18:10:19 +0000 | [diff] [blame] | 123 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 124 | typedef struct { | 
 | 125 |     Py_UCS4 fill_char; | 
 | 126 |     Py_UCS4 align; | 
 | 127 |     int alternate; | 
 | 128 |     Py_UCS4 sign; | 
 | 129 |     Py_ssize_t width; | 
 | 130 |     int thousands_separators; | 
 | 131 |     Py_ssize_t precision; | 
 | 132 |     Py_UCS4 type; | 
 | 133 | } InternalFormatSpec; | 
| Eric Smith | 4a7d76d | 2008-05-30 18:10:19 +0000 | [diff] [blame] | 134 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 135 | #if 0 | 
| Raymond Hettinger | 15f44ab | 2016-08-30 10:47:49 -0700 | [diff] [blame] | 136 | /* Occasionally useful for debugging. Should normally be commented out. */ | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 137 | static void | 
 | 138 | DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format) | 
 | 139 | { | 
 | 140 |     printf("internal format spec: fill_char %d\n", format->fill_char); | 
 | 141 |     printf("internal format spec: align %d\n", format->align); | 
 | 142 |     printf("internal format spec: alternate %d\n", format->alternate); | 
 | 143 |     printf("internal format spec: sign %d\n", format->sign); | 
 | 144 |     printf("internal format spec: width %zd\n", format->width); | 
 | 145 |     printf("internal format spec: thousands_separators %d\n", | 
 | 146 |            format->thousands_separators); | 
 | 147 |     printf("internal format spec: precision %zd\n", format->precision); | 
 | 148 |     printf("internal format spec: type %c\n", format->type); | 
 | 149 |     printf("\n"); | 
 | 150 | } | 
 | 151 | #endif | 
 | 152 |  | 
 | 153 |  | 
 | 154 | /* | 
 | 155 |   ptr points to the start of the format_spec, end points just past its end. | 
 | 156 |   fills in format with the parsed information. | 
 | 157 |   returns 1 on success, 0 on failure. | 
 | 158 |   if failure, sets the exception | 
 | 159 | */ | 
 | 160 | static int | 
 | 161 | parse_internal_render_format_spec(PyObject *format_spec, | 
 | 162 |                                   Py_ssize_t start, Py_ssize_t end, | 
 | 163 |                                   InternalFormatSpec *format, | 
 | 164 |                                   char default_type, | 
 | 165 |                                   char default_align) | 
 | 166 | { | 
 | 167 |     Py_ssize_t pos = start; | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 168 |     int kind = PyUnicode_KIND(format_spec); | 
 | 169 |     void *data = PyUnicode_DATA(format_spec); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 170 |     /* end-pos is used throughout this code to specify the length of | 
 | 171 |        the input string */ | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 172 | #define READ_spec(index) PyUnicode_READ(kind, data, index) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 173 |  | 
 | 174 |     Py_ssize_t consumed; | 
 | 175 |     int align_specified = 0; | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 176 |     int fill_char_specified = 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 177 |  | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 178 |     format->fill_char = ' '; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 179 |     format->align = default_align; | 
 | 180 |     format->alternate = 0; | 
 | 181 |     format->sign = '\0'; | 
 | 182 |     format->width = -1; | 
 | 183 |     format->thousands_separators = 0; | 
 | 184 |     format->precision = -1; | 
 | 185 |     format->type = default_type; | 
 | 186 |  | 
 | 187 |     /* If the second char is an alignment token, | 
 | 188 |        then parse the fill char */ | 
 | 189 |     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) { | 
 | 190 |         format->align = READ_spec(pos+1); | 
 | 191 |         format->fill_char = READ_spec(pos); | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 192 |         fill_char_specified = 1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 193 |         align_specified = 1; | 
 | 194 |         pos += 2; | 
 | 195 |     } | 
 | 196 |     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) { | 
 | 197 |         format->align = READ_spec(pos); | 
 | 198 |         align_specified = 1; | 
 | 199 |         ++pos; | 
 | 200 |     } | 
 | 201 |  | 
 | 202 |     /* Parse the various sign options */ | 
 | 203 |     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) { | 
 | 204 |         format->sign = READ_spec(pos); | 
 | 205 |         ++pos; | 
 | 206 |     } | 
 | 207 |  | 
 | 208 |     /* If the next character is #, we're in alternate mode.  This only | 
 | 209 |        applies to integers. */ | 
 | 210 |     if (end-pos >= 1 && READ_spec(pos) == '#') { | 
 | 211 |         format->alternate = 1; | 
 | 212 |         ++pos; | 
 | 213 |     } | 
 | 214 |  | 
 | 215 |     /* The special case for 0-padding (backwards compat) */ | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 216 |     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 217 |         format->fill_char = '0'; | 
 | 218 |         if (!align_specified) { | 
 | 219 |             format->align = '='; | 
 | 220 |         } | 
 | 221 |         ++pos; | 
 | 222 |     } | 
 | 223 |  | 
 | 224 |     consumed = get_integer(format_spec, &pos, end, &format->width); | 
 | 225 |     if (consumed == -1) | 
 | 226 |         /* Overflow error. Exception already set. */ | 
 | 227 |         return 0; | 
 | 228 |  | 
 | 229 |     /* If consumed is 0, we didn't consume any characters for the | 
 | 230 |        width. In that case, reset the width to -1, because | 
 | 231 |        get_integer() will have set it to zero. -1 is how we record | 
 | 232 |        that the width wasn't specified. */ | 
 | 233 |     if (consumed == 0) | 
 | 234 |         format->width = -1; | 
 | 235 |  | 
 | 236 |     /* Comma signifies add thousands separators */ | 
 | 237 |     if (end-pos && READ_spec(pos) == ',') { | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 238 |         format->thousands_separators = LT_DEFAULT_LOCALE; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 239 |         ++pos; | 
 | 240 |     } | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 241 |     /* Underscore signifies add thousands separators */ | 
 | 242 |     if (end-pos && READ_spec(pos) == '_') { | 
 | 243 |         if (format->thousands_separators != 0) { | 
 | 244 |             invalid_comma_and_underscore(); | 
 | 245 |             return 0; | 
 | 246 |         } | 
 | 247 |         format->thousands_separators = LT_UNDERSCORE_LOCALE; | 
 | 248 |         ++pos; | 
 | 249 |     } | 
 | 250 |     if (end-pos && READ_spec(pos) == ',') { | 
 | 251 |         invalid_comma_and_underscore(); | 
 | 252 |         return 0; | 
 | 253 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 254 |  | 
 | 255 |     /* Parse field precision */ | 
 | 256 |     if (end-pos && READ_spec(pos) == '.') { | 
 | 257 |         ++pos; | 
 | 258 |  | 
 | 259 |         consumed = get_integer(format_spec, &pos, end, &format->precision); | 
 | 260 |         if (consumed == -1) | 
 | 261 |             /* Overflow error. Exception already set. */ | 
 | 262 |             return 0; | 
 | 263 |  | 
 | 264 |         /* Not having a precision after a dot is an error. */ | 
 | 265 |         if (consumed == 0) { | 
 | 266 |             PyErr_Format(PyExc_ValueError, | 
 | 267 |                          "Format specifier missing precision"); | 
 | 268 |             return 0; | 
 | 269 |         } | 
 | 270 |  | 
 | 271 |     } | 
 | 272 |  | 
 | 273 |     /* Finally, parse the type field. */ | 
 | 274 |  | 
 | 275 |     if (end-pos > 1) { | 
| Eric V. Smith | d25cfe6 | 2012-01-19 20:04:28 -0500 | [diff] [blame] | 276 |         /* More than one char remain, invalid format specifier. */ | 
 | 277 |         PyErr_Format(PyExc_ValueError, "Invalid format specifier"); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 278 |         return 0; | 
 | 279 |     } | 
 | 280 |  | 
 | 281 |     if (end-pos == 1) { | 
 | 282 |         format->type = READ_spec(pos); | 
 | 283 |         ++pos; | 
 | 284 |     } | 
 | 285 |  | 
 | 286 |     /* Do as much validating as we can, just by looking at the format | 
 | 287 |        specifier.  Do not take into account what type of formatting | 
 | 288 |        we're doing (int, float, string). */ | 
 | 289 |  | 
 | 290 |     if (format->thousands_separators) { | 
 | 291 |         switch (format->type) { | 
 | 292 |         case 'd': | 
 | 293 |         case 'e': | 
 | 294 |         case 'f': | 
 | 295 |         case 'g': | 
 | 296 |         case 'E': | 
 | 297 |         case 'G': | 
 | 298 |         case '%': | 
 | 299 |         case 'F': | 
 | 300 |         case '\0': | 
 | 301 |             /* These are allowed. See PEP 378.*/ | 
 | 302 |             break; | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 303 |         case 'b': | 
 | 304 |         case 'o': | 
 | 305 |         case 'x': | 
 | 306 |         case 'X': | 
 | 307 |             /* Underscores are allowed in bin/oct/hex. See PEP 515. */ | 
 | 308 |             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { | 
 | 309 |                 /* Every four digits, not every three, in bin/oct/hex. */ | 
 | 310 |                 format->thousands_separators = LT_UNDER_FOUR_LOCALE; | 
 | 311 |                 break; | 
 | 312 |             } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 313 |         default: | 
 | 314 |             invalid_comma_type(format->type); | 
 | 315 |             return 0; | 
 | 316 |         } | 
 | 317 |     } | 
 | 318 |  | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 319 |     assert (format->align <= 127); | 
 | 320 |     assert (format->sign <= 127); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 321 |     return 1; | 
 | 322 | } | 
 | 323 |  | 
 | 324 | /* Calculate the padding needed. */ | 
 | 325 | static void | 
 | 326 | calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align, | 
 | 327 |              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding, | 
 | 328 |              Py_ssize_t *n_total) | 
 | 329 | { | 
 | 330 |     if (width >= 0) { | 
 | 331 |         if (nchars > width) | 
 | 332 |             *n_total = nchars; | 
 | 333 |         else | 
 | 334 |             *n_total = width; | 
 | 335 |     } | 
 | 336 |     else { | 
 | 337 |         /* not specified, use all of the chars and no more */ | 
 | 338 |         *n_total = nchars; | 
 | 339 |     } | 
 | 340 |  | 
 | 341 |     /* Figure out how much leading space we need, based on the | 
 | 342 |        aligning */ | 
 | 343 |     if (align == '>') | 
 | 344 |         *n_lpadding = *n_total - nchars; | 
 | 345 |     else if (align == '^') | 
 | 346 |         *n_lpadding = (*n_total - nchars) / 2; | 
 | 347 |     else if (align == '<' || align == '=') | 
 | 348 |         *n_lpadding = 0; | 
 | 349 |     else { | 
 | 350 |         /* We should never have an unspecified alignment. */ | 
 | 351 |         *n_lpadding = 0; | 
 | 352 |         assert(0); | 
 | 353 |     } | 
 | 354 |  | 
 | 355 |     *n_rpadding = *n_total - nchars - *n_lpadding; | 
 | 356 | } | 
 | 357 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 358 | /* Do the padding, and return a pointer to where the caller-supplied | 
 | 359 |    content goes. */ | 
| Victor Stinner | 9ce59bb | 2013-05-17 00:04:56 +0200 | [diff] [blame] | 360 | static int | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 361 | fill_padding(_PyUnicodeWriter *writer, | 
 | 362 |              Py_ssize_t nchars, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 363 |              Py_UCS4 fill_char, Py_ssize_t n_lpadding, | 
 | 364 |              Py_ssize_t n_rpadding) | 
 | 365 | { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 366 |     Py_ssize_t pos; | 
 | 367 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 368 |     /* Pad on left. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 369 |     if (n_lpadding) { | 
 | 370 |         pos = writer->pos; | 
 | 371 |         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char); | 
 | 372 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 373 |  | 
 | 374 |     /* Pad on right. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 375 |     if (n_rpadding) { | 
 | 376 |         pos = writer->pos + nchars + n_lpadding; | 
 | 377 |         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char); | 
 | 378 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 379 |  | 
 | 380 |     /* Pointer to the user content. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 381 |     writer->pos += n_lpadding; | 
 | 382 |     return 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 383 | } | 
 | 384 |  | 
 | 385 | /************************************************************************/ | 
 | 386 | /*********** common routines for numeric formatting *********************/ | 
 | 387 | /************************************************************************/ | 
 | 388 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 389 | /* Locale info needed for formatting integers and the part of floats | 
 | 390 |    before and including the decimal. Note that locales only support | 
 | 391 |    8-bit chars, not unicode. */ | 
 | 392 | typedef struct { | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 393 |     PyObject *decimal_point; | 
 | 394 |     PyObject *thousands_sep; | 
 | 395 |     const char *grouping; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 396 | } LocaleInfo; | 
 | 397 |  | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 398 | #define STATIC_LOCALE_INFO_INIT {0, 0, 0} | 
 | 399 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 400 | /* describes the layout for an integer, see the comment in | 
 | 401 |    calc_number_widths() for details */ | 
 | 402 | typedef struct { | 
 | 403 |     Py_ssize_t n_lpadding; | 
 | 404 |     Py_ssize_t n_prefix; | 
 | 405 |     Py_ssize_t n_spadding; | 
 | 406 |     Py_ssize_t n_rpadding; | 
 | 407 |     char sign; | 
 | 408 |     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */ | 
 | 409 |     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including | 
 | 410 |                                     any grouping chars. */ | 
 | 411 |     Py_ssize_t n_decimal;   /* 0 if only an integer */ | 
 | 412 |     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, | 
 | 413 |                                excluding the decimal itself, if | 
 | 414 |                                present. */ | 
 | 415 |  | 
 | 416 |     /* These 2 are not the widths of fields, but are needed by | 
 | 417 |        STRINGLIB_GROUPING. */ | 
 | 418 |     Py_ssize_t n_digits;    /* The number of digits before a decimal | 
 | 419 |                                or exponent. */ | 
 | 420 |     Py_ssize_t n_min_width; /* The min_width we used when we computed | 
 | 421 |                                the n_grouped_digits width. */ | 
 | 422 | } NumberFieldWidths; | 
 | 423 |  | 
 | 424 |  | 
 | 425 | /* Given a number of the form: | 
 | 426 |    digits[remainder] | 
 | 427 |    where ptr points to the start and end points to the end, find where | 
 | 428 |     the integer part ends. This could be a decimal, an exponent, both, | 
 | 429 |     or neither. | 
 | 430 |    If a decimal point is present, set *has_decimal and increment | 
 | 431 |     remainder beyond it. | 
 | 432 |    Results are undefined (but shouldn't crash) for improperly | 
 | 433 |     formatted strings. | 
 | 434 | */ | 
 | 435 | static void | 
 | 436 | parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, | 
 | 437 |              Py_ssize_t *n_remainder, int *has_decimal) | 
 | 438 | { | 
 | 439 |     Py_ssize_t remainder; | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 440 |     int kind = PyUnicode_KIND(s); | 
 | 441 |     void *data = PyUnicode_DATA(s); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 442 |  | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 443 |     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 444 |         ++pos; | 
 | 445 |     remainder = pos; | 
 | 446 |  | 
 | 447 |     /* Does remainder start with a decimal point? */ | 
| Serhiy Storchaka | 1f93261 | 2016-08-29 15:57:26 +0300 | [diff] [blame] | 448 |     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.'; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 449 |  | 
 | 450 |     /* Skip the decimal point. */ | 
 | 451 |     if (*has_decimal) | 
 | 452 |         remainder++; | 
 | 453 |  | 
 | 454 |     *n_remainder = end - remainder; | 
 | 455 | } | 
 | 456 |  | 
 | 457 | /* not all fields of format are used.  for example, precision is | 
 | 458 |    unused.  should this take discrete params in order to be more clear | 
 | 459 |    about what it does?  or is passing a single format parameter easier | 
 | 460 |    and more efficient enough to justify a little obfuscation? */ | 
 | 461 | static Py_ssize_t | 
 | 462 | calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, | 
 | 463 |                    Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, | 
 | 464 |                    Py_ssize_t n_end, Py_ssize_t n_remainder, | 
 | 465 |                    int has_decimal, const LocaleInfo *locale, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 466 |                    const InternalFormatSpec *format, Py_UCS4 *maxchar) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 467 | { | 
 | 468 |     Py_ssize_t n_non_digit_non_padding; | 
 | 469 |     Py_ssize_t n_padding; | 
 | 470 |  | 
 | 471 |     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0); | 
 | 472 |     spec->n_lpadding = 0; | 
 | 473 |     spec->n_prefix = n_prefix; | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 474 |     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 475 |     spec->n_remainder = n_remainder; | 
 | 476 |     spec->n_spadding = 0; | 
 | 477 |     spec->n_rpadding = 0; | 
 | 478 |     spec->sign = '\0'; | 
 | 479 |     spec->n_sign = 0; | 
 | 480 |  | 
 | 481 |     /* the output will look like: | 
 | 482 |        |                                                                                         | | 
 | 483 |        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> | | 
 | 484 |        |                                                                                         | | 
 | 485 |  | 
 | 486 |        sign is computed from format->sign and the actual | 
 | 487 |        sign of the number | 
 | 488 |  | 
 | 489 |        prefix is given (it's for the '0x' prefix) | 
 | 490 |  | 
 | 491 |        digits is already known | 
 | 492 |  | 
 | 493 |        the total width is either given, or computed from the | 
 | 494 |        actual digits | 
 | 495 |  | 
 | 496 |        only one of lpadding, spadding, and rpadding can be non-zero, | 
 | 497 |        and it's calculated from the width and other fields | 
 | 498 |     */ | 
 | 499 |  | 
 | 500 |     /* compute the various parts we're going to write */ | 
 | 501 |     switch (format->sign) { | 
 | 502 |     case '+': | 
 | 503 |         /* always put a + or - */ | 
 | 504 |         spec->n_sign = 1; | 
 | 505 |         spec->sign = (sign_char == '-' ? '-' : '+'); | 
 | 506 |         break; | 
 | 507 |     case ' ': | 
 | 508 |         spec->n_sign = 1; | 
 | 509 |         spec->sign = (sign_char == '-' ? '-' : ' '); | 
 | 510 |         break; | 
 | 511 |     default: | 
 | 512 |         /* Not specified, or the default (-) */ | 
 | 513 |         if (sign_char == '-') { | 
 | 514 |             spec->n_sign = 1; | 
 | 515 |             spec->sign = '-'; | 
 | 516 |         } | 
 | 517 |     } | 
 | 518 |  | 
 | 519 |     /* The number of chars used for non-digits and non-padding. */ | 
 | 520 |     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + | 
 | 521 |         spec->n_remainder; | 
 | 522 |  | 
 | 523 |     /* min_width can go negative, that's okay. format->width == -1 means | 
 | 524 |        we don't care. */ | 
 | 525 |     if (format->fill_char == '0' && format->align == '=') | 
 | 526 |         spec->n_min_width = format->width - n_non_digit_non_padding; | 
 | 527 |     else | 
 | 528 |         spec->n_min_width = 0; | 
 | 529 |  | 
 | 530 |     if (spec->n_digits == 0) | 
 | 531 |         /* This case only occurs when using 'c' formatting, we need | 
 | 532 |            to special case it because the grouping code always wants | 
 | 533 |            to have at least one character. */ | 
 | 534 |         spec->n_grouped_digits = 0; | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 535 |     else { | 
 | 536 |         Py_UCS4 grouping_maxchar; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 537 |         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 538 |             NULL, 0, | 
 | 539 |             0, NULL, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 540 |             spec->n_digits, spec->n_min_width, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 541 |             locale->grouping, locale->thousands_sep, &grouping_maxchar); | 
 | 542 |         *maxchar = Py_MAX(*maxchar, grouping_maxchar); | 
 | 543 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 544 |  | 
 | 545 |     /* Given the desired width and the total of digit and non-digit | 
 | 546 |        space we consume, see if we need any padding. format->width can | 
 | 547 |        be negative (meaning no padding), but this code still works in | 
 | 548 |        that case. */ | 
 | 549 |     n_padding = format->width - | 
 | 550 |                         (n_non_digit_non_padding + spec->n_grouped_digits); | 
 | 551 |     if (n_padding > 0) { | 
 | 552 |         /* Some padding is needed. Determine if it's left, space, or right. */ | 
 | 553 |         switch (format->align) { | 
 | 554 |         case '<': | 
 | 555 |             spec->n_rpadding = n_padding; | 
 | 556 |             break; | 
 | 557 |         case '^': | 
 | 558 |             spec->n_lpadding = n_padding / 2; | 
 | 559 |             spec->n_rpadding = n_padding - spec->n_lpadding; | 
 | 560 |             break; | 
 | 561 |         case '=': | 
 | 562 |             spec->n_spadding = n_padding; | 
 | 563 |             break; | 
 | 564 |         case '>': | 
 | 565 |             spec->n_lpadding = n_padding; | 
 | 566 |             break; | 
 | 567 |         default: | 
 | 568 |             /* Shouldn't get here, but treat it as '>' */ | 
 | 569 |             spec->n_lpadding = n_padding; | 
 | 570 |             assert(0); | 
 | 571 |             break; | 
 | 572 |         } | 
 | 573 |     } | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 574 |  | 
 | 575 |     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding) | 
 | 576 |         *maxchar = Py_MAX(*maxchar, format->fill_char); | 
 | 577 |  | 
| Victor Stinner | 90f50d4 | 2012-02-24 01:44:47 +0100 | [diff] [blame] | 578 |     if (spec->n_decimal) | 
 | 579 |         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point)); | 
 | 580 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 581 |     return spec->n_lpadding + spec->n_sign + spec->n_prefix + | 
 | 582 |         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + | 
 | 583 |         spec->n_remainder + spec->n_rpadding; | 
 | 584 | } | 
 | 585 |  | 
 | 586 | /* Fill in the digit parts of a numbers's string representation, | 
 | 587 |    as determined in calc_number_widths(). | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 588 |    Return -1 on error, or 0 on success. */ | 
 | 589 | static int | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 590 | fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 591 |             PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 592 |             PyObject *prefix, Py_ssize_t p_start, | 
 | 593 |             Py_UCS4 fill_char, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 594 |             LocaleInfo *locale, int toupper) | 
 | 595 | { | 
 | 596 |     /* Used to keep track of digits, decimal, and remainder. */ | 
 | 597 |     Py_ssize_t d_pos = d_start; | 
| Victor Stinner | 22c103b | 2013-05-07 23:50:03 +0200 | [diff] [blame] | 598 |     const unsigned int kind = writer->kind; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 599 |     const void *data = writer->data; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 600 |     Py_ssize_t r; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 601 |  | 
 | 602 |     if (spec->n_lpadding) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 603 |         _PyUnicode_FastFill(writer->buffer, | 
 | 604 |                             writer->pos, spec->n_lpadding, fill_char); | 
 | 605 |         writer->pos += spec->n_lpadding; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 606 |     } | 
 | 607 |     if (spec->n_sign == 1) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 608 |         PyUnicode_WRITE(kind, data, writer->pos, spec->sign); | 
 | 609 |         writer->pos++; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 610 |     } | 
 | 611 |     if (spec->n_prefix) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 612 |         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, | 
 | 613 |                                       prefix, p_start, | 
 | 614 |                                       spec->n_prefix); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 615 |         if (toupper) { | 
 | 616 |             Py_ssize_t t; | 
| Benjamin Peterson | 21e0da2 | 2012-01-11 21:00:42 -0500 | [diff] [blame] | 617 |             for (t = 0; t < spec->n_prefix; t++) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 618 |                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); | 
| Victor Stinner | ed27785 | 2012-02-01 00:22:23 +0100 | [diff] [blame] | 619 |                 c = Py_TOUPPER(c); | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 620 |                 assert (c <= 127); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 621 |                 PyUnicode_WRITE(kind, data, writer->pos + t, c); | 
| Benjamin Peterson | 21e0da2 | 2012-01-11 21:00:42 -0500 | [diff] [blame] | 622 |             } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 623 |         } | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 624 |         writer->pos += spec->n_prefix; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 625 |     } | 
 | 626 |     if (spec->n_spadding) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 627 |         _PyUnicode_FastFill(writer->buffer, | 
 | 628 |                             writer->pos, spec->n_spadding, fill_char); | 
 | 629 |         writer->pos += spec->n_spadding; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 630 |     } | 
 | 631 |  | 
 | 632 |     /* Only for type 'c' special case, it has no digits. */ | 
 | 633 |     if (spec->n_digits != 0) { | 
 | 634 |         /* Fill the digits with InsertThousandsGrouping. */ | 
| Victor Stinner | dba2dee | 2011-09-28 21:50:42 +0200 | [diff] [blame] | 635 |         char *pdigits; | 
 | 636 |         if (PyUnicode_READY(digits)) | 
 | 637 |             return -1; | 
 | 638 |         pdigits = PyUnicode_DATA(digits); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 639 |         if (PyUnicode_KIND(digits) < kind) { | 
 | 640 |             pdigits = _PyUnicode_AsKind(digits, kind); | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 641 |             if (pdigits == NULL) | 
 | 642 |                 return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 643 |         } | 
| Victor Stinner | 90f50d4 | 2012-02-24 01:44:47 +0100 | [diff] [blame] | 644 |         r = _PyUnicode_InsertThousandsGrouping( | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 645 |                 writer->buffer, writer->pos, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 646 |                 spec->n_grouped_digits, | 
| Martin v. Löwis | c47adb0 | 2011-10-07 20:55:35 +0200 | [diff] [blame] | 647 |                 pdigits + kind * d_pos, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 648 |                 spec->n_digits, spec->n_min_width, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 649 |                 locale->grouping, locale->thousands_sep, NULL); | 
| Victor Stinner | 90f50d4 | 2012-02-24 01:44:47 +0100 | [diff] [blame] | 650 |         if (r == -1) | 
 | 651 |             return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 652 |         assert(r == spec->n_grouped_digits); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 653 |         if (PyUnicode_KIND(digits) < kind) | 
 | 654 |             PyMem_Free(pdigits); | 
 | 655 |         d_pos += spec->n_digits; | 
 | 656 |     } | 
 | 657 |     if (toupper) { | 
 | 658 |         Py_ssize_t t; | 
| Benjamin Peterson | 21e0da2 | 2012-01-11 21:00:42 -0500 | [diff] [blame] | 659 |         for (t = 0; t < spec->n_grouped_digits; t++) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 660 |             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); | 
| Victor Stinner | ed27785 | 2012-02-01 00:22:23 +0100 | [diff] [blame] | 661 |             c = Py_TOUPPER(c); | 
| Benjamin Peterson | 21e0da2 | 2012-01-11 21:00:42 -0500 | [diff] [blame] | 662 |             if (c > 127) { | 
 | 663 |                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); | 
 | 664 |                 return -1; | 
 | 665 |             } | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 666 |             PyUnicode_WRITE(kind, data, writer->pos + t, c); | 
| Benjamin Peterson | 21e0da2 | 2012-01-11 21:00:42 -0500 | [diff] [blame] | 667 |         } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 668 |     } | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 669 |     writer->pos += spec->n_grouped_digits; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 670 |  | 
 | 671 |     if (spec->n_decimal) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 672 |         _PyUnicode_FastCopyCharacters( | 
 | 673 |             writer->buffer, writer->pos, | 
 | 674 |             locale->decimal_point, 0, spec->n_decimal); | 
 | 675 |         writer->pos += spec->n_decimal; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 676 |         d_pos += 1; | 
 | 677 |     } | 
 | 678 |  | 
 | 679 |     if (spec->n_remainder) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 680 |         _PyUnicode_FastCopyCharacters( | 
 | 681 |             writer->buffer, writer->pos, | 
 | 682 |             digits, d_pos, spec->n_remainder); | 
 | 683 |         writer->pos += spec->n_remainder; | 
| Brett Cannon | 8a250fa | 2012-06-25 16:13:44 -0400 | [diff] [blame] | 684 |         /* d_pos += spec->n_remainder; */ | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 685 |     } | 
 | 686 |  | 
 | 687 |     if (spec->n_rpadding) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 688 |         _PyUnicode_FastFill(writer->buffer, | 
 | 689 |                             writer->pos, spec->n_rpadding, | 
 | 690 |                             fill_char); | 
 | 691 |         writer->pos += spec->n_rpadding; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 692 |     } | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 693 |     return 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 694 | } | 
 | 695 |  | 
| Serhiy Storchaka | 2d06e84 | 2015-12-25 19:53:18 +0200 | [diff] [blame] | 696 | static const char no_grouping[1] = {CHAR_MAX}; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 697 |  | 
 | 698 | /* Find the decimal point character(s?), thousands_separator(s?), and | 
 | 699 |    grouping description, either for the current locale if type is | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 700 |    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or | 
 | 701 |    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 702 | static int | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 703 | get_locale_info(int type, LocaleInfo *locale_info) | 
 | 704 | { | 
 | 705 |     switch (type) { | 
 | 706 |     case LT_CURRENT_LOCALE: { | 
 | 707 |         struct lconv *locale_data = localeconv(); | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 708 |         locale_info->decimal_point = PyUnicode_DecodeLocale( | 
 | 709 |                                          locale_data->decimal_point, | 
 | 710 |                                          NULL); | 
 | 711 |         if (locale_info->decimal_point == NULL) | 
 | 712 |             return -1; | 
 | 713 |         locale_info->thousands_sep = PyUnicode_DecodeLocale( | 
 | 714 |                                          locale_data->thousands_sep, | 
 | 715 |                                          NULL); | 
 | 716 |         if (locale_info->thousands_sep == NULL) { | 
 | 717 |             Py_DECREF(locale_info->decimal_point); | 
 | 718 |             return -1; | 
 | 719 |         } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 720 |         locale_info->grouping = locale_data->grouping; | 
 | 721 |         break; | 
 | 722 |     } | 
 | 723 |     case LT_DEFAULT_LOCALE: | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 724 |     case LT_UNDERSCORE_LOCALE: | 
 | 725 |     case LT_UNDER_FOUR_LOCALE: | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 726 |         locale_info->decimal_point = PyUnicode_FromOrdinal('.'); | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 727 |         locale_info->thousands_sep = PyUnicode_FromOrdinal( | 
 | 728 |             type == LT_DEFAULT_LOCALE ? ',' : '_'); | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 729 |         if (!locale_info->decimal_point || !locale_info->thousands_sep) { | 
 | 730 |             Py_XDECREF(locale_info->decimal_point); | 
 | 731 |             Py_XDECREF(locale_info->thousands_sep); | 
 | 732 |             return -1; | 
 | 733 |         } | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 734 |         if (type != LT_UNDER_FOUR_LOCALE) | 
 | 735 |             locale_info->grouping = "\3"; /* Group every 3 characters.  The | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 736 |                                          (implicit) trailing 0 means repeat | 
 | 737 |                                          infinitely. */ | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 738 |         else | 
 | 739 |             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */ | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 740 |         break; | 
 | 741 |     case LT_NO_LOCALE: | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 742 |         locale_info->decimal_point = PyUnicode_FromOrdinal('.'); | 
 | 743 |         locale_info->thousands_sep = PyUnicode_New(0, 0); | 
 | 744 |         if (!locale_info->decimal_point || !locale_info->thousands_sep) { | 
 | 745 |             Py_XDECREF(locale_info->decimal_point); | 
 | 746 |             Py_XDECREF(locale_info->thousands_sep); | 
 | 747 |             return -1; | 
 | 748 |         } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 749 |         locale_info->grouping = no_grouping; | 
 | 750 |         break; | 
 | 751 |     default: | 
 | 752 |         assert(0); | 
 | 753 |     } | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 754 |     return 0; | 
 | 755 | } | 
 | 756 |  | 
 | 757 | static void | 
 | 758 | free_locale_info(LocaleInfo *locale_info) | 
 | 759 | { | 
 | 760 |     Py_XDECREF(locale_info->decimal_point); | 
 | 761 |     Py_XDECREF(locale_info->thousands_sep); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 762 | } | 
 | 763 |  | 
 | 764 | /************************************************************************/ | 
 | 765 | /*********** string formatting ******************************************/ | 
 | 766 | /************************************************************************/ | 
 | 767 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 768 | static int | 
 | 769 | format_string_internal(PyObject *value, const InternalFormatSpec *format, | 
 | 770 |                        _PyUnicodeWriter *writer) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 771 | { | 
 | 772 |     Py_ssize_t lpad; | 
 | 773 |     Py_ssize_t rpad; | 
 | 774 |     Py_ssize_t total; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 775 |     Py_ssize_t len; | 
 | 776 |     int result = -1; | 
| Victor Stinner | ece58de | 2012-04-23 23:36:38 +0200 | [diff] [blame] | 777 |     Py_UCS4 maxchar; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 778 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 779 |     assert(PyUnicode_IS_READY(value)); | 
 | 780 |     len = PyUnicode_GET_LENGTH(value); | 
 | 781 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 782 |     /* sign is not allowed on strings */ | 
 | 783 |     if (format->sign != '\0') { | 
 | 784 |         PyErr_SetString(PyExc_ValueError, | 
 | 785 |                         "Sign not allowed in string format specifier"); | 
 | 786 |         goto done; | 
 | 787 |     } | 
 | 788 |  | 
 | 789 |     /* alternate is not allowed on strings */ | 
 | 790 |     if (format->alternate) { | 
 | 791 |         PyErr_SetString(PyExc_ValueError, | 
 | 792 |                         "Alternate form (#) not allowed in string format " | 
 | 793 |                         "specifier"); | 
 | 794 |         goto done; | 
 | 795 |     } | 
 | 796 |  | 
 | 797 |     /* '=' alignment not allowed on strings */ | 
 | 798 |     if (format->align == '=') { | 
 | 799 |         PyErr_SetString(PyExc_ValueError, | 
 | 800 |                         "'=' alignment not allowed " | 
 | 801 |                         "in string format specifier"); | 
 | 802 |         goto done; | 
 | 803 |     } | 
 | 804 |  | 
| Victor Stinner | 621ef3d | 2012-10-02 00:33:47 +0200 | [diff] [blame] | 805 |     if ((format->width == -1 || format->width <= len) | 
 | 806 |         && (format->precision == -1 || format->precision >= len)) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 807 |         /* Fast path */ | 
 | 808 |         return _PyUnicodeWriter_WriteStr(writer, value); | 
 | 809 |     } | 
 | 810 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 811 |     /* if precision is specified, output no more that format.precision | 
 | 812 |        characters */ | 
 | 813 |     if (format->precision >= 0 && len >= format->precision) { | 
 | 814 |         len = format->precision; | 
 | 815 |     } | 
 | 816 |  | 
 | 817 |     calc_padding(len, format->width, format->align, &lpad, &rpad, &total); | 
 | 818 |  | 
| Victor Stinner | eb4b5ac | 2013-04-03 02:02:33 +0200 | [diff] [blame] | 819 |     maxchar = writer->maxchar; | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 820 |     if (lpad != 0 || rpad != 0) | 
 | 821 |         maxchar = Py_MAX(maxchar, format->fill_char); | 
| Victor Stinner | eb4b5ac | 2013-04-03 02:02:33 +0200 | [diff] [blame] | 822 |     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) { | 
 | 823 |         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len); | 
 | 824 |         maxchar = Py_MAX(maxchar, valmaxchar); | 
 | 825 |     } | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 826 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 827 |     /* allocate the resulting string */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 828 |     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 829 |         goto done; | 
 | 830 |  | 
 | 831 |     /* Write into that space. First the padding. */ | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 832 |     result = fill_padding(writer, len, format->fill_char, lpad, rpad); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 833 |     if (result == -1) | 
 | 834 |         goto done; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 835 |  | 
 | 836 |     /* Then the source string. */ | 
| Victor Stinner | c9d369f | 2012-06-16 02:22:37 +0200 | [diff] [blame] | 837 |     if (len) { | 
 | 838 |         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, | 
 | 839 |                                       value, 0, len); | 
 | 840 |     } | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 841 |     writer->pos += (len + rpad); | 
 | 842 |     result = 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 843 |  | 
 | 844 | done: | 
 | 845 |     return result; | 
 | 846 | } | 
 | 847 |  | 
 | 848 |  | 
 | 849 | /************************************************************************/ | 
 | 850 | /*********** long formatting ********************************************/ | 
 | 851 | /************************************************************************/ | 
 | 852 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 853 | static int | 
 | 854 | format_long_internal(PyObject *value, const InternalFormatSpec *format, | 
 | 855 |                      _PyUnicodeWriter *writer) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 856 | { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 857 |     int result = -1; | 
| Amaury Forgeot d'Arc | cd27df3 | 2012-01-23 22:42:19 +0100 | [diff] [blame] | 858 |     Py_UCS4 maxchar = 127; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 859 |     PyObject *tmp = NULL; | 
 | 860 |     Py_ssize_t inumeric_chars; | 
 | 861 |     Py_UCS4 sign_char = '\0'; | 
 | 862 |     Py_ssize_t n_digits;       /* count of digits need from the computed | 
 | 863 |                                   string */ | 
 | 864 |     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which | 
 | 865 |                                    produces non-digits */ | 
 | 866 |     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */ | 
 | 867 |     Py_ssize_t n_total; | 
| Victor Stinner | ed27785 | 2012-02-01 00:22:23 +0100 | [diff] [blame] | 868 |     Py_ssize_t prefix = 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 869 |     NumberFieldWidths spec; | 
 | 870 |     long x; | 
 | 871 |  | 
 | 872 |     /* Locale settings, either from the actual locale or | 
 | 873 |        from a hard-code pseudo-locale */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 874 |     LocaleInfo locale = STATIC_LOCALE_INFO_INIT; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 875 |  | 
 | 876 |     /* no precision allowed on integers */ | 
 | 877 |     if (format->precision != -1) { | 
 | 878 |         PyErr_SetString(PyExc_ValueError, | 
 | 879 |                         "Precision not allowed in integer format specifier"); | 
 | 880 |         goto done; | 
 | 881 |     } | 
 | 882 |  | 
 | 883 |     /* special case for character formatting */ | 
 | 884 |     if (format->type == 'c') { | 
 | 885 |         /* error to specify a sign */ | 
 | 886 |         if (format->sign != '\0') { | 
 | 887 |             PyErr_SetString(PyExc_ValueError, | 
 | 888 |                             "Sign not allowed with integer" | 
 | 889 |                             " format specifier 'c'"); | 
 | 890 |             goto done; | 
 | 891 |         } | 
| Eric V. Smith | a12572f | 2014-04-15 22:37:55 -0400 | [diff] [blame] | 892 |         /* error to request alternate format */ | 
 | 893 |         if (format->alternate) { | 
 | 894 |             PyErr_SetString(PyExc_ValueError, | 
 | 895 |                             "Alternate form (#) not allowed with integer" | 
 | 896 |                             " format specifier 'c'"); | 
 | 897 |             goto done; | 
 | 898 |         } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 899 |  | 
 | 900 |         /* taken from unicodeobject.c formatchar() */ | 
 | 901 |         /* Integer input truncated to a character */ | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 902 |         x = PyLong_AsLong(value); | 
 | 903 |         if (x == -1 && PyErr_Occurred()) | 
 | 904 |             goto done; | 
 | 905 |         if (x < 0 || x > 0x10ffff) { | 
 | 906 |             PyErr_SetString(PyExc_OverflowError, | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 907 |                             "%c arg not in range(0x110000)"); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 908 |             goto done; | 
 | 909 |         } | 
 | 910 |         tmp = PyUnicode_FromOrdinal(x); | 
 | 911 |         inumeric_chars = 0; | 
 | 912 |         n_digits = 1; | 
| Amaury Forgeot d'Arc | 6d766fc | 2012-01-23 23:20:43 +0100 | [diff] [blame] | 913 |         maxchar = Py_MAX(maxchar, (Py_UCS4)x); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 914 |  | 
 | 915 |         /* As a sort-of hack, we tell calc_number_widths that we only | 
 | 916 |            have "remainder" characters. calc_number_widths thinks | 
 | 917 |            these are characters that don't get formatted, only copied | 
 | 918 |            into the output string. We do this for 'c' formatting, | 
 | 919 |            because the characters are likely to be non-digits. */ | 
 | 920 |         n_remainder = 1; | 
 | 921 |     } | 
 | 922 |     else { | 
 | 923 |         int base; | 
 | 924 |         int leading_chars_to_skip = 0;  /* Number of characters added by | 
 | 925 |                                            PyNumber_ToBase that we want to | 
 | 926 |                                            skip over. */ | 
 | 927 |  | 
 | 928 |         /* Compute the base and how many characters will be added by | 
 | 929 |            PyNumber_ToBase */ | 
 | 930 |         switch (format->type) { | 
 | 931 |         case 'b': | 
 | 932 |             base = 2; | 
 | 933 |             leading_chars_to_skip = 2; /* 0b */ | 
 | 934 |             break; | 
 | 935 |         case 'o': | 
 | 936 |             base = 8; | 
 | 937 |             leading_chars_to_skip = 2; /* 0o */ | 
 | 938 |             break; | 
 | 939 |         case 'x': | 
 | 940 |         case 'X': | 
 | 941 |             base = 16; | 
 | 942 |             leading_chars_to_skip = 2; /* 0x */ | 
 | 943 |             break; | 
 | 944 |         default:  /* shouldn't be needed, but stops a compiler warning */ | 
 | 945 |         case 'd': | 
 | 946 |         case 'n': | 
 | 947 |             base = 10; | 
 | 948 |             break; | 
 | 949 |         } | 
 | 950 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 951 |         if (format->sign != '+' && format->sign != ' ' | 
 | 952 |             && format->width == -1 | 
 | 953 |             && format->type != 'X' && format->type != 'n' | 
 | 954 |             && !format->thousands_separators | 
 | 955 |             && PyLong_CheckExact(value)) | 
 | 956 |         { | 
 | 957 |             /* Fast path */ | 
 | 958 |             return _PyLong_FormatWriter(writer, value, base, format->alternate); | 
 | 959 |         } | 
 | 960 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 961 |         /* The number of prefix chars is the same as the leading | 
 | 962 |            chars to skip */ | 
 | 963 |         if (format->alternate) | 
 | 964 |             n_prefix = leading_chars_to_skip; | 
 | 965 |  | 
 | 966 |         /* Do the hard part, converting to a string in a given base */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 967 |         tmp = _PyLong_Format(value, base); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 968 |         if (tmp == NULL || PyUnicode_READY(tmp) == -1) | 
 | 969 |             goto done; | 
 | 970 |  | 
 | 971 |         inumeric_chars = 0; | 
 | 972 |         n_digits = PyUnicode_GET_LENGTH(tmp); | 
 | 973 |  | 
 | 974 |         prefix = inumeric_chars; | 
 | 975 |  | 
 | 976 |         /* Is a sign character present in the output?  If so, remember it | 
 | 977 |            and skip it */ | 
 | 978 |         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') { | 
 | 979 |             sign_char = '-'; | 
 | 980 |             ++prefix; | 
 | 981 |             ++leading_chars_to_skip; | 
 | 982 |         } | 
 | 983 |  | 
 | 984 |         /* Skip over the leading chars (0x, 0b, etc.) */ | 
 | 985 |         n_digits -= leading_chars_to_skip; | 
 | 986 |         inumeric_chars += leading_chars_to_skip; | 
 | 987 |     } | 
 | 988 |  | 
 | 989 |     /* Determine the grouping, separator, and decimal point, if any. */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 990 |     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 991 |                         format->thousands_separators, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 992 |                         &locale) == -1) | 
 | 993 |         goto done; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 994 |  | 
 | 995 |     /* Calculate how much memory we'll need. */ | 
 | 996 |     n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 997 |                                  inumeric_chars + n_digits, n_remainder, 0, | 
 | 998 |                                  &locale, format, &maxchar); | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 999 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1000 |     /* Allocate the memory. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1001 |     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1002 |         goto done; | 
 | 1003 |  | 
 | 1004 |     /* Populate the memory. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1005 |     result = fill_number(writer, &spec, | 
 | 1006 |                          tmp, inumeric_chars, inumeric_chars + n_digits, | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 1007 |                          tmp, prefix, format->fill_char, | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1008 |                          &locale, format->type == 'X'); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1009 |  | 
 | 1010 | done: | 
 | 1011 |     Py_XDECREF(tmp); | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1012 |     free_locale_info(&locale); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1013 |     return result; | 
 | 1014 | } | 
 | 1015 |  | 
 | 1016 | /************************************************************************/ | 
 | 1017 | /*********** float formatting *******************************************/ | 
 | 1018 | /************************************************************************/ | 
 | 1019 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1020 | /* much of this is taken from unicodeobject.c */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1021 | static int | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1022 | format_float_internal(PyObject *value, | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1023 |                       const InternalFormatSpec *format, | 
 | 1024 |                       _PyUnicodeWriter *writer) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1025 | { | 
 | 1026 |     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */ | 
 | 1027 |     Py_ssize_t n_digits; | 
 | 1028 |     Py_ssize_t n_remainder; | 
 | 1029 |     Py_ssize_t n_total; | 
 | 1030 |     int has_decimal; | 
 | 1031 |     double val; | 
| Victor Stinner | 76d3850 | 2013-06-24 23:34:15 +0200 | [diff] [blame] | 1032 |     int precision, default_precision = 6; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1033 |     Py_UCS4 type = format->type; | 
 | 1034 |     int add_pct = 0; | 
 | 1035 |     Py_ssize_t index; | 
 | 1036 |     NumberFieldWidths spec; | 
 | 1037 |     int flags = 0; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1038 |     int result = -1; | 
| Amaury Forgeot d'Arc | cd27df3 | 2012-01-23 22:42:19 +0100 | [diff] [blame] | 1039 |     Py_UCS4 maxchar = 127; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1040 |     Py_UCS4 sign_char = '\0'; | 
 | 1041 |     int float_type; /* Used to see if we have a nan, inf, or regular float. */ | 
 | 1042 |     PyObject *unicode_tmp = NULL; | 
 | 1043 |  | 
 | 1044 |     /* Locale settings, either from the actual locale or | 
 | 1045 |        from a hard-code pseudo-locale */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1046 |     LocaleInfo locale = STATIC_LOCALE_INFO_INIT; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1047 |  | 
| Victor Stinner | 2f084ec | 2013-06-23 14:54:30 +0200 | [diff] [blame] | 1048 |     if (format->precision > INT_MAX) { | 
 | 1049 |         PyErr_SetString(PyExc_ValueError, "precision too big"); | 
 | 1050 |         goto done; | 
 | 1051 |     } | 
 | 1052 |     precision = (int)format->precision; | 
 | 1053 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1054 |     if (format->alternate) | 
 | 1055 |         flags |= Py_DTSF_ALT; | 
 | 1056 |  | 
 | 1057 |     if (type == '\0') { | 
 | 1058 |         /* Omitted type specifier.  Behaves in the same way as repr(x) | 
 | 1059 |            and str(x) if no precision is given, else like 'g', but with | 
 | 1060 |            at least one digit after the decimal point. */ | 
 | 1061 |         flags |= Py_DTSF_ADD_DOT_0; | 
 | 1062 |         type = 'r'; | 
 | 1063 |         default_precision = 0; | 
 | 1064 |     } | 
 | 1065 |  | 
 | 1066 |     if (type == 'n') | 
 | 1067 |         /* 'n' is the same as 'g', except for the locale used to | 
 | 1068 |            format the result. We take care of that later. */ | 
 | 1069 |         type = 'g'; | 
 | 1070 |  | 
 | 1071 |     val = PyFloat_AsDouble(value); | 
 | 1072 |     if (val == -1.0 && PyErr_Occurred()) | 
 | 1073 |         goto done; | 
 | 1074 |  | 
 | 1075 |     if (type == '%') { | 
 | 1076 |         type = 'f'; | 
 | 1077 |         val *= 100; | 
 | 1078 |         add_pct = 1; | 
 | 1079 |     } | 
 | 1080 |  | 
 | 1081 |     if (precision < 0) | 
 | 1082 |         precision = default_precision; | 
 | 1083 |     else if (type == 'r') | 
 | 1084 |         type = 'g'; | 
 | 1085 |  | 
| Martin Panter | 4c35964 | 2016-05-08 13:53:41 +0000 | [diff] [blame] | 1086 |     /* Cast "type", because if we're in unicode we need to pass an | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1087 |        8-bit char. This is safe, because we've restricted what "type" | 
 | 1088 |        can be. */ | 
 | 1089 |     buf = PyOS_double_to_string(val, (char)type, precision, flags, | 
 | 1090 |                                 &float_type); | 
 | 1091 |     if (buf == NULL) | 
 | 1092 |         goto done; | 
 | 1093 |     n_digits = strlen(buf); | 
 | 1094 |  | 
 | 1095 |     if (add_pct) { | 
 | 1096 |         /* We know that buf has a trailing zero (since we just called | 
 | 1097 |            strlen() on it), and we don't use that fact any more. So we | 
 | 1098 |            can just write over the trailing zero. */ | 
 | 1099 |         buf[n_digits] = '%'; | 
 | 1100 |         n_digits += 1; | 
 | 1101 |     } | 
 | 1102 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1103 |     if (format->sign != '+' && format->sign != ' ' | 
 | 1104 |         && format->width == -1 | 
 | 1105 |         && format->type != 'n' | 
 | 1106 |         && !format->thousands_separators) | 
 | 1107 |     { | 
 | 1108 |         /* Fast path */ | 
| Victor Stinner | 4a58707 | 2013-11-19 12:54:53 +0100 | [diff] [blame] | 1109 |         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits); | 
 | 1110 |         PyMem_Free(buf); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1111 |         return result; | 
 | 1112 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1113 |  | 
| Victor Stinner | 4a58707 | 2013-11-19 12:54:53 +0100 | [diff] [blame] | 1114 |     /* Since there is no unicode version of PyOS_double_to_string, | 
 | 1115 |        just use the 8 bit version and then convert to unicode. */ | 
 | 1116 |     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits); | 
 | 1117 |     PyMem_Free(buf); | 
 | 1118 |     if (unicode_tmp == NULL) | 
 | 1119 |         goto done; | 
 | 1120 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1121 |     /* Is a sign character present in the output?  If so, remember it | 
 | 1122 |        and skip it */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1123 |     index = 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1124 |     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') { | 
 | 1125 |         sign_char = '-'; | 
 | 1126 |         ++index; | 
 | 1127 |         --n_digits; | 
 | 1128 |     } | 
 | 1129 |  | 
 | 1130 |     /* Determine if we have any "remainder" (after the digits, might include | 
 | 1131 |        decimal or exponent or both (or neither)) */ | 
 | 1132 |     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); | 
 | 1133 |  | 
 | 1134 |     /* Determine the grouping, separator, and decimal point, if any. */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1135 |     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 1136 |                         format->thousands_separators, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1137 |                         &locale) == -1) | 
 | 1138 |         goto done; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1139 |  | 
 | 1140 |     /* Calculate how much memory we'll need. */ | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 1141 |     n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1142 |                                  index + n_digits, n_remainder, has_decimal, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1143 |                                  &locale, format, &maxchar); | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 1144 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1145 |     /* Allocate the memory. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1146 |     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1147 |         goto done; | 
 | 1148 |  | 
 | 1149 |     /* Populate the memory. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1150 |     result = fill_number(writer, &spec, | 
 | 1151 |                          unicode_tmp, index, index + n_digits, | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 1152 |                          NULL, 0, format->fill_char, | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1153 |                          &locale, 0); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1154 |  | 
 | 1155 | done: | 
| Stefan Krah | d9c1bf7 | 2012-09-06 13:02:46 +0200 | [diff] [blame] | 1156 |     Py_XDECREF(unicode_tmp); | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1157 |     free_locale_info(&locale); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1158 |     return result; | 
 | 1159 | } | 
 | 1160 |  | 
 | 1161 | /************************************************************************/ | 
 | 1162 | /*********** complex formatting *****************************************/ | 
 | 1163 | /************************************************************************/ | 
 | 1164 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1165 | static int | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1166 | format_complex_internal(PyObject *value, | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1167 |                         const InternalFormatSpec *format, | 
 | 1168 |                         _PyUnicodeWriter *writer) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1169 | { | 
 | 1170 |     double re; | 
 | 1171 |     double im; | 
 | 1172 |     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */ | 
 | 1173 |     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */ | 
 | 1174 |  | 
 | 1175 |     InternalFormatSpec tmp_format = *format; | 
 | 1176 |     Py_ssize_t n_re_digits; | 
 | 1177 |     Py_ssize_t n_im_digits; | 
 | 1178 |     Py_ssize_t n_re_remainder; | 
 | 1179 |     Py_ssize_t n_im_remainder; | 
 | 1180 |     Py_ssize_t n_re_total; | 
 | 1181 |     Py_ssize_t n_im_total; | 
 | 1182 |     int re_has_decimal; | 
 | 1183 |     int im_has_decimal; | 
| Victor Stinner | 76d3850 | 2013-06-24 23:34:15 +0200 | [diff] [blame] | 1184 |     int precision, default_precision = 6; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1185 |     Py_UCS4 type = format->type; | 
 | 1186 |     Py_ssize_t i_re; | 
 | 1187 |     Py_ssize_t i_im; | 
 | 1188 |     NumberFieldWidths re_spec; | 
 | 1189 |     NumberFieldWidths im_spec; | 
 | 1190 |     int flags = 0; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1191 |     int result = -1; | 
| Amaury Forgeot d'Arc | cd27df3 | 2012-01-23 22:42:19 +0100 | [diff] [blame] | 1192 |     Py_UCS4 maxchar = 127; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1193 |     enum PyUnicode_Kind rkind; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1194 |     void *rdata; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1195 |     Py_UCS4 re_sign_char = '\0'; | 
 | 1196 |     Py_UCS4 im_sign_char = '\0'; | 
 | 1197 |     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ | 
 | 1198 |     int im_float_type; | 
 | 1199 |     int add_parens = 0; | 
 | 1200 |     int skip_re = 0; | 
 | 1201 |     Py_ssize_t lpad; | 
 | 1202 |     Py_ssize_t rpad; | 
 | 1203 |     Py_ssize_t total; | 
 | 1204 |     PyObject *re_unicode_tmp = NULL; | 
 | 1205 |     PyObject *im_unicode_tmp = NULL; | 
 | 1206 |  | 
 | 1207 |     /* Locale settings, either from the actual locale or | 
 | 1208 |        from a hard-code pseudo-locale */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1209 |     LocaleInfo locale = STATIC_LOCALE_INFO_INIT; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1210 |  | 
| Victor Stinner | 2f084ec | 2013-06-23 14:54:30 +0200 | [diff] [blame] | 1211 |     if (format->precision > INT_MAX) { | 
 | 1212 |         PyErr_SetString(PyExc_ValueError, "precision too big"); | 
 | 1213 |         goto done; | 
 | 1214 |     } | 
 | 1215 |     precision = (int)format->precision; | 
 | 1216 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1217 |     /* Zero padding is not allowed. */ | 
 | 1218 |     if (format->fill_char == '0') { | 
 | 1219 |         PyErr_SetString(PyExc_ValueError, | 
 | 1220 |                         "Zero padding is not allowed in complex format " | 
 | 1221 |                         "specifier"); | 
 | 1222 |         goto done; | 
 | 1223 |     } | 
 | 1224 |  | 
 | 1225 |     /* Neither is '=' alignment . */ | 
 | 1226 |     if (format->align == '=') { | 
 | 1227 |         PyErr_SetString(PyExc_ValueError, | 
 | 1228 |                         "'=' alignment flag is not allowed in complex format " | 
 | 1229 |                         "specifier"); | 
 | 1230 |         goto done; | 
 | 1231 |     } | 
 | 1232 |  | 
 | 1233 |     re = PyComplex_RealAsDouble(value); | 
 | 1234 |     if (re == -1.0 && PyErr_Occurred()) | 
 | 1235 |         goto done; | 
 | 1236 |     im = PyComplex_ImagAsDouble(value); | 
 | 1237 |     if (im == -1.0 && PyErr_Occurred()) | 
 | 1238 |         goto done; | 
 | 1239 |  | 
 | 1240 |     if (format->alternate) | 
 | 1241 |         flags |= Py_DTSF_ALT; | 
 | 1242 |  | 
 | 1243 |     if (type == '\0') { | 
 | 1244 |         /* Omitted type specifier. Should be like str(self). */ | 
 | 1245 |         type = 'r'; | 
 | 1246 |         default_precision = 0; | 
 | 1247 |         if (re == 0.0 && copysign(1.0, re) == 1.0) | 
 | 1248 |             skip_re = 1; | 
 | 1249 |         else | 
 | 1250 |             add_parens = 1; | 
 | 1251 |     } | 
 | 1252 |  | 
 | 1253 |     if (type == 'n') | 
 | 1254 |         /* 'n' is the same as 'g', except for the locale used to | 
 | 1255 |            format the result. We take care of that later. */ | 
 | 1256 |         type = 'g'; | 
 | 1257 |  | 
 | 1258 |     if (precision < 0) | 
 | 1259 |         precision = default_precision; | 
 | 1260 |     else if (type == 'r') | 
 | 1261 |         type = 'g'; | 
 | 1262 |  | 
| Martin Panter | 4c35964 | 2016-05-08 13:53:41 +0000 | [diff] [blame] | 1263 |     /* Cast "type", because if we're in unicode we need to pass an | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1264 |        8-bit char. This is safe, because we've restricted what "type" | 
 | 1265 |        can be. */ | 
 | 1266 |     re_buf = PyOS_double_to_string(re, (char)type, precision, flags, | 
 | 1267 |                                    &re_float_type); | 
 | 1268 |     if (re_buf == NULL) | 
 | 1269 |         goto done; | 
 | 1270 |     im_buf = PyOS_double_to_string(im, (char)type, precision, flags, | 
 | 1271 |                                    &im_float_type); | 
 | 1272 |     if (im_buf == NULL) | 
 | 1273 |         goto done; | 
 | 1274 |  | 
 | 1275 |     n_re_digits = strlen(re_buf); | 
 | 1276 |     n_im_digits = strlen(im_buf); | 
 | 1277 |  | 
 | 1278 |     /* Since there is no unicode version of PyOS_double_to_string, | 
 | 1279 |        just use the 8 bit version and then convert to unicode. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1280 |     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1281 |     if (re_unicode_tmp == NULL) | 
 | 1282 |         goto done; | 
 | 1283 |     i_re = 0; | 
 | 1284 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1285 |     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1286 |     if (im_unicode_tmp == NULL) | 
 | 1287 |         goto done; | 
 | 1288 |     i_im = 0; | 
 | 1289 |  | 
 | 1290 |     /* Is a sign character present in the output?  If so, remember it | 
 | 1291 |        and skip it */ | 
 | 1292 |     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') { | 
 | 1293 |         re_sign_char = '-'; | 
 | 1294 |         ++i_re; | 
 | 1295 |         --n_re_digits; | 
 | 1296 |     } | 
 | 1297 |     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') { | 
 | 1298 |         im_sign_char = '-'; | 
 | 1299 |         ++i_im; | 
 | 1300 |         --n_im_digits; | 
 | 1301 |     } | 
 | 1302 |  | 
 | 1303 |     /* Determine if we have any "remainder" (after the digits, might include | 
 | 1304 |        decimal or exponent or both (or neither)) */ | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 1305 |     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1306 |                  &n_re_remainder, &re_has_decimal); | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 1307 |     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1308 |                  &n_im_remainder, &im_has_decimal); | 
 | 1309 |  | 
 | 1310 |     /* Determine the grouping, separator, and decimal point, if any. */ | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1311 |     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : | 
| Eric V. Smith | 89e1b1a | 2016-09-09 23:06:47 -0400 | [diff] [blame] | 1312 |                         format->thousands_separators, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1313 |                         &locale) == -1) | 
 | 1314 |         goto done; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1315 |  | 
 | 1316 |     /* Turn off any padding. We'll do it later after we've composed | 
 | 1317 |        the numbers without padding. */ | 
 | 1318 |     tmp_format.fill_char = '\0'; | 
 | 1319 |     tmp_format.align = '<'; | 
 | 1320 |     tmp_format.width = -1; | 
 | 1321 |  | 
 | 1322 |     /* Calculate how much memory we'll need. */ | 
 | 1323 |     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp, | 
 | 1324 |                                     i_re, i_re + n_re_digits, n_re_remainder, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1325 |                                     re_has_decimal, &locale, &tmp_format, | 
 | 1326 |                                     &maxchar); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1327 |  | 
 | 1328 |     /* Same formatting, but always include a sign, unless the real part is | 
 | 1329 |      * going to be omitted, in which case we use whatever sign convention was | 
 | 1330 |      * requested by the original format. */ | 
 | 1331 |     if (!skip_re) | 
 | 1332 |         tmp_format.sign = '+'; | 
 | 1333 |     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp, | 
 | 1334 |                                     i_im, i_im + n_im_digits, n_im_remainder, | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1335 |                                     im_has_decimal, &locale, &tmp_format, | 
 | 1336 |                                     &maxchar); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1337 |  | 
 | 1338 |     if (skip_re) | 
 | 1339 |         n_re_total = 0; | 
 | 1340 |  | 
 | 1341 |     /* Add 1 for the 'j', and optionally 2 for parens. */ | 
 | 1342 |     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, | 
 | 1343 |                  format->width, format->align, &lpad, &rpad, &total); | 
 | 1344 |  | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1345 |     if (lpad || rpad) | 
| Victor Stinner | a4ac600 | 2012-01-21 15:50:49 +0100 | [diff] [blame] | 1346 |         maxchar = Py_MAX(maxchar, format->fill_char); | 
 | 1347 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1348 |     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1349 |         goto done; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1350 |     rkind = writer->kind; | 
 | 1351 |     rdata = writer->data; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1352 |  | 
 | 1353 |     /* Populate the memory. First, the padding. */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1354 |     result = fill_padding(writer, | 
 | 1355 |                           n_re_total + n_im_total + 1 + add_parens * 2, | 
| Eric V. Smith | 2ea9712 | 2014-04-14 11:55:10 -0400 | [diff] [blame] | 1356 |                           format->fill_char, lpad, rpad); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1357 |     if (result == -1) | 
 | 1358 |         goto done; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1359 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1360 |     if (add_parens) { | 
 | 1361 |         PyUnicode_WRITE(rkind, rdata, writer->pos, '('); | 
 | 1362 |         writer->pos++; | 
 | 1363 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1364 |  | 
 | 1365 |     if (!skip_re) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1366 |         result = fill_number(writer, &re_spec, | 
 | 1367 |                              re_unicode_tmp, i_re, i_re + n_re_digits, | 
 | 1368 |                              NULL, 0, | 
 | 1369 |                              0, | 
 | 1370 |                              &locale, 0); | 
 | 1371 |         if (result == -1) | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 1372 |             goto done; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1373 |     } | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1374 |     result = fill_number(writer, &im_spec, | 
 | 1375 |                          im_unicode_tmp, i_im, i_im + n_im_digits, | 
 | 1376 |                          NULL, 0, | 
 | 1377 |                          0, | 
 | 1378 |                          &locale, 0); | 
 | 1379 |     if (result == -1) | 
| Victor Stinner | afbaa20 | 2011-09-28 21:50:16 +0200 | [diff] [blame] | 1380 |         goto done; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1381 |     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j'); | 
 | 1382 |     writer->pos++; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1383 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1384 |     if (add_parens) { | 
 | 1385 |         PyUnicode_WRITE(rkind, rdata, writer->pos, ')'); | 
 | 1386 |         writer->pos++; | 
 | 1387 |     } | 
 | 1388 |  | 
 | 1389 |     writer->pos += rpad; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1390 |  | 
 | 1391 | done: | 
 | 1392 |     PyMem_Free(re_buf); | 
 | 1393 |     PyMem_Free(im_buf); | 
 | 1394 |     Py_XDECREF(re_unicode_tmp); | 
 | 1395 |     Py_XDECREF(im_unicode_tmp); | 
| Victor Stinner | 41a863c | 2012-02-24 00:37:51 +0100 | [diff] [blame] | 1396 |     free_locale_info(&locale); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1397 |     return result; | 
 | 1398 | } | 
 | 1399 |  | 
 | 1400 | /************************************************************************/ | 
 | 1401 | /*********** built in formatters ****************************************/ | 
 | 1402 | /************************************************************************/ | 
| doko@ubuntu.com | 39378f7 | 2012-06-21 12:12:20 +0200 | [diff] [blame] | 1403 | static int | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1404 | format_obj(PyObject *obj, _PyUnicodeWriter *writer) | 
 | 1405 | { | 
 | 1406 |     PyObject *str; | 
 | 1407 |     int err; | 
 | 1408 |  | 
 | 1409 |     str = PyObject_Str(obj); | 
 | 1410 |     if (str == NULL) | 
 | 1411 |         return -1; | 
 | 1412 |     err = _PyUnicodeWriter_WriteStr(writer, str); | 
 | 1413 |     Py_DECREF(str); | 
 | 1414 |     return err; | 
 | 1415 | } | 
 | 1416 |  | 
 | 1417 | int | 
 | 1418 | _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer, | 
 | 1419 |                                 PyObject *obj, | 
 | 1420 |                                 PyObject *format_spec, | 
 | 1421 |                                 Py_ssize_t start, Py_ssize_t end) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1422 | { | 
 | 1423 |     InternalFormatSpec format; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1424 |  | 
 | 1425 |     assert(PyUnicode_Check(obj)); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1426 |  | 
 | 1427 |     /* check for the special case of zero length format spec, make | 
 | 1428 |        it equivalent to str(obj) */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1429 |     if (start == end) { | 
 | 1430 |         if (PyUnicode_CheckExact(obj)) | 
 | 1431 |             return _PyUnicodeWriter_WriteStr(writer, obj); | 
 | 1432 |         else | 
 | 1433 |             return format_obj(obj, writer); | 
 | 1434 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1435 |  | 
 | 1436 |     /* parse the format_spec */ | 
 | 1437 |     if (!parse_internal_render_format_spec(format_spec, start, end, | 
 | 1438 |                                            &format, 's', '<')) | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1439 |         return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1440 |  | 
 | 1441 |     /* type conversion? */ | 
 | 1442 |     switch (format.type) { | 
 | 1443 |     case 's': | 
 | 1444 |         /* no type conversion needed, already a string.  do the formatting */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1445 |         return format_string_internal(obj, &format, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1446 |     default: | 
 | 1447 |         /* unknown */ | 
 | 1448 |         unknown_presentation_type(format.type, obj->ob_type->tp_name); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1449 |         return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1450 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1451 | } | 
 | 1452 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1453 | int | 
 | 1454 | _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer, | 
 | 1455 |                              PyObject *obj, | 
 | 1456 |                              PyObject *format_spec, | 
 | 1457 |                              Py_ssize_t start, Py_ssize_t end) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1458 | { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1459 |     PyObject *tmp = NULL, *str = NULL; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1460 |     InternalFormatSpec format; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1461 |     int result = -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1462 |  | 
 | 1463 |     /* check for the special case of zero length format spec, make | 
 | 1464 |        it equivalent to str(obj) */ | 
 | 1465 |     if (start == end) { | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1466 |         if (PyLong_CheckExact(obj)) | 
 | 1467 |             return _PyLong_FormatWriter(writer, obj, 10, 0); | 
 | 1468 |         else | 
 | 1469 |             return format_obj(obj, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1470 |     } | 
 | 1471 |  | 
 | 1472 |     /* parse the format_spec */ | 
 | 1473 |     if (!parse_internal_render_format_spec(format_spec, start, end, | 
 | 1474 |                                            &format, 'd', '>')) | 
 | 1475 |         goto done; | 
 | 1476 |  | 
 | 1477 |     /* type conversion? */ | 
 | 1478 |     switch (format.type) { | 
 | 1479 |     case 'b': | 
 | 1480 |     case 'c': | 
 | 1481 |     case 'd': | 
 | 1482 |     case 'o': | 
 | 1483 |     case 'x': | 
 | 1484 |     case 'X': | 
 | 1485 |     case 'n': | 
| Serhiy Storchaka | 9594942 | 2013-08-27 19:40:23 +0300 | [diff] [blame] | 1486 |         /* no type conversion needed, already an int.  do the formatting */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1487 |         result = format_long_internal(obj, &format, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1488 |         break; | 
 | 1489 |  | 
 | 1490 |     case 'e': | 
 | 1491 |     case 'E': | 
 | 1492 |     case 'f': | 
 | 1493 |     case 'F': | 
 | 1494 |     case 'g': | 
 | 1495 |     case 'G': | 
 | 1496 |     case '%': | 
 | 1497 |         /* convert to float */ | 
 | 1498 |         tmp = PyNumber_Float(obj); | 
 | 1499 |         if (tmp == NULL) | 
 | 1500 |             goto done; | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1501 |         result = format_float_internal(tmp, &format, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1502 |         break; | 
 | 1503 |  | 
 | 1504 |     default: | 
 | 1505 |         /* unknown */ | 
 | 1506 |         unknown_presentation_type(format.type, obj->ob_type->tp_name); | 
 | 1507 |         goto done; | 
 | 1508 |     } | 
 | 1509 |  | 
 | 1510 | done: | 
 | 1511 |     Py_XDECREF(tmp); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1512 |     Py_XDECREF(str); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1513 |     return result; | 
 | 1514 | } | 
 | 1515 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1516 | int | 
 | 1517 | _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer, | 
 | 1518 |                               PyObject *obj, | 
 | 1519 |                               PyObject *format_spec, | 
 | 1520 |                               Py_ssize_t start, Py_ssize_t end) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1521 | { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1522 |     InternalFormatSpec format; | 
 | 1523 |  | 
 | 1524 |     /* check for the special case of zero length format spec, make | 
 | 1525 |        it equivalent to str(obj) */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1526 |     if (start == end) | 
 | 1527 |         return format_obj(obj, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1528 |  | 
 | 1529 |     /* parse the format_spec */ | 
 | 1530 |     if (!parse_internal_render_format_spec(format_spec, start, end, | 
 | 1531 |                                            &format, '\0', '>')) | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1532 |         return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1533 |  | 
 | 1534 |     /* type conversion? */ | 
 | 1535 |     switch (format.type) { | 
 | 1536 |     case '\0': /* No format code: like 'g', but with at least one decimal. */ | 
 | 1537 |     case 'e': | 
 | 1538 |     case 'E': | 
 | 1539 |     case 'f': | 
 | 1540 |     case 'F': | 
 | 1541 |     case 'g': | 
 | 1542 |     case 'G': | 
 | 1543 |     case 'n': | 
 | 1544 |     case '%': | 
 | 1545 |         /* no conversion, already a float.  do the formatting */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1546 |         return format_float_internal(obj, &format, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1547 |  | 
 | 1548 |     default: | 
 | 1549 |         /* unknown */ | 
 | 1550 |         unknown_presentation_type(format.type, obj->ob_type->tp_name); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1551 |         return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1552 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1553 | } | 
 | 1554 |  | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1555 | int | 
 | 1556 | _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer, | 
 | 1557 |                                 PyObject *obj, | 
 | 1558 |                                 PyObject *format_spec, | 
 | 1559 |                                 Py_ssize_t start, Py_ssize_t end) | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1560 | { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1561 |     InternalFormatSpec format; | 
 | 1562 |  | 
 | 1563 |     /* check for the special case of zero length format spec, make | 
 | 1564 |        it equivalent to str(obj) */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1565 |     if (start == end) | 
 | 1566 |         return format_obj(obj, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1567 |  | 
 | 1568 |     /* parse the format_spec */ | 
 | 1569 |     if (!parse_internal_render_format_spec(format_spec, start, end, | 
 | 1570 |                                            &format, '\0', '>')) | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1571 |         return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1572 |  | 
 | 1573 |     /* type conversion? */ | 
 | 1574 |     switch (format.type) { | 
 | 1575 |     case '\0': /* No format code: like 'g', but with at least one decimal. */ | 
 | 1576 |     case 'e': | 
 | 1577 |     case 'E': | 
 | 1578 |     case 'f': | 
 | 1579 |     case 'F': | 
 | 1580 |     case 'g': | 
 | 1581 |     case 'G': | 
 | 1582 |     case 'n': | 
 | 1583 |         /* no conversion, already a complex.  do the formatting */ | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1584 |         return format_complex_internal(obj, &format, writer); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1585 |  | 
 | 1586 |     default: | 
 | 1587 |         /* unknown */ | 
 | 1588 |         unknown_presentation_type(format.type, obj->ob_type->tp_name); | 
| Victor Stinner | d3f0882 | 2012-05-29 12:57:52 +0200 | [diff] [blame] | 1589 |         return -1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1590 |     } | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 1591 | } |