| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 1 | /* implements the string, long, and float formatters.  that is, | 
|  | 2 | string.__format__, etc. */ | 
|  | 3 |  | 
|  | 4 | /* Before including this, you must include either: | 
|  | 5 | stringlib/unicodedefs.h | 
|  | 6 | stringlib/stringdefs.h | 
|  | 7 |  | 
|  | 8 | Also, you should define the names: | 
|  | 9 | FORMAT_STRING | 
|  | 10 | FORMAT_LONG | 
|  | 11 | FORMAT_FLOAT | 
|  | 12 | to be whatever you want the public names of these functions to | 
|  | 13 | be.  These are the only non-static functions defined here. | 
|  | 14 | */ | 
|  | 15 |  | 
|  | 16 | /* | 
|  | 17 | get_integer consumes 0 or more decimal digit characters from an | 
|  | 18 | input string, updates *result with the corresponding positive | 
|  | 19 | integer, and returns the number of digits consumed. | 
|  | 20 |  | 
|  | 21 | returns -1 on error. | 
|  | 22 | */ | 
|  | 23 | static int | 
|  | 24 | get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end, | 
|  | 25 | Py_ssize_t *result) | 
|  | 26 | { | 
|  | 27 | Py_ssize_t accumulator, digitval, oldaccumulator; | 
|  | 28 | int numdigits; | 
|  | 29 | accumulator = numdigits = 0; | 
|  | 30 | for (;;(*ptr)++, numdigits++) { | 
|  | 31 | if (*ptr >= end) | 
|  | 32 | break; | 
|  | 33 | digitval = STRINGLIB_TODECIMAL(**ptr); | 
|  | 34 | if (digitval < 0) | 
|  | 35 | break; | 
|  | 36 | /* | 
|  | 37 | This trick was copied from old Unicode format code.  It's cute, | 
|  | 38 | but would really suck on an old machine with a slow divide | 
|  | 39 | implementation.  Fortunately, in the normal case we do not | 
|  | 40 | expect too many digits. | 
|  | 41 | */ | 
|  | 42 | oldaccumulator = accumulator; | 
|  | 43 | accumulator *= 10; | 
|  | 44 | if ((accumulator+10)/10 != oldaccumulator+1) { | 
|  | 45 | PyErr_Format(PyExc_ValueError, | 
|  | 46 | "Too many decimal digits in format string"); | 
|  | 47 | return -1; | 
|  | 48 | } | 
|  | 49 | accumulator += digitval; | 
|  | 50 | } | 
|  | 51 | *result = accumulator; | 
|  | 52 | return numdigits; | 
|  | 53 | } | 
|  | 54 |  | 
|  | 55 | /************************************************************************/ | 
|  | 56 | /*********** standard format specifier parsing **************************/ | 
|  | 57 | /************************************************************************/ | 
|  | 58 |  | 
|  | 59 | /* returns true if this character is a specifier alignment token */ | 
|  | 60 | Py_LOCAL_INLINE(int) | 
|  | 61 | is_alignment_token(STRINGLIB_CHAR c) | 
|  | 62 | { | 
|  | 63 | switch (c) { | 
|  | 64 | case '<': case '>': case '=': case '^': | 
|  | 65 | return 1; | 
|  | 66 | default: | 
|  | 67 | return 0; | 
|  | 68 | } | 
|  | 69 | } | 
|  | 70 |  | 
|  | 71 | /* returns true if this character is a sign element */ | 
|  | 72 | Py_LOCAL_INLINE(int) | 
|  | 73 | is_sign_element(STRINGLIB_CHAR c) | 
|  | 74 | { | 
|  | 75 | switch (c) { | 
|  | 76 | case ' ': case '+': case '-': case '(': | 
|  | 77 | return 1; | 
|  | 78 | default: | 
|  | 79 | return 0; | 
|  | 80 | } | 
|  | 81 | } | 
|  | 82 |  | 
|  | 83 |  | 
|  | 84 | typedef struct { | 
|  | 85 | STRINGLIB_CHAR fill_char; | 
|  | 86 | STRINGLIB_CHAR align; | 
|  | 87 | STRINGLIB_CHAR sign; | 
|  | 88 | Py_ssize_t width; | 
|  | 89 | Py_ssize_t precision; | 
|  | 90 | STRINGLIB_CHAR type; | 
|  | 91 | } InternalFormatSpec; | 
|  | 92 |  | 
|  | 93 | /* | 
|  | 94 | ptr points to the start of the format_spec, end points just past its end. | 
|  | 95 | fills in format with the parsed information. | 
|  | 96 | returns 1 on success, 0 on failure. | 
|  | 97 | if failure, sets the exception | 
|  | 98 | */ | 
|  | 99 | static int | 
|  | 100 | parse_internal_render_format_spec(PyObject *format_spec, | 
|  | 101 | InternalFormatSpec *format, | 
|  | 102 | char default_type) | 
|  | 103 | { | 
|  | 104 | STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec); | 
|  | 105 | STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec); | 
|  | 106 |  | 
|  | 107 | /* end-ptr is used throughout this code to specify the length of | 
|  | 108 | the input string */ | 
|  | 109 |  | 
|  | 110 | Py_ssize_t specified_width; | 
|  | 111 |  | 
|  | 112 | format->fill_char = '\0'; | 
|  | 113 | format->align = '\0'; | 
|  | 114 | format->sign = '\0'; | 
|  | 115 | format->width = -1; | 
|  | 116 | format->precision = -1; | 
|  | 117 | format->type = default_type; | 
|  | 118 |  | 
|  | 119 | /* If the second char is an alignment token, | 
|  | 120 | then parse the fill char */ | 
|  | 121 | if (end-ptr >= 2 && is_alignment_token(ptr[1])) { | 
|  | 122 | format->align = ptr[1]; | 
|  | 123 | format->fill_char = ptr[0]; | 
|  | 124 | ptr += 2; | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 125 | } | 
|  | 126 | else if (end-ptr >= 1 && is_alignment_token(ptr[0])) { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 127 | format->align = ptr[0]; | 
|  | 128 | ptr++; | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | /* Parse the various sign options */ | 
|  | 132 | if (end-ptr >= 1 && is_sign_element(ptr[0])) { | 
|  | 133 | format->sign = ptr[0]; | 
|  | 134 | ptr++; | 
|  | 135 | if (end-ptr >= 1 && ptr[0] == ')') { | 
|  | 136 | ptr++; | 
|  | 137 | } | 
|  | 138 | } | 
|  | 139 |  | 
|  | 140 | /* The special case for 0-padding (backwards compat) */ | 
|  | 141 | if (format->fill_char == '\0' && | 
|  | 142 | end-ptr >= 1 && ptr[0] == '0') { | 
|  | 143 | format->fill_char = '0'; | 
|  | 144 | if (format->align == '\0') { | 
|  | 145 | format->align = '='; | 
|  | 146 | } | 
|  | 147 | ptr++; | 
|  | 148 | } | 
|  | 149 |  | 
|  | 150 | /* XXX add error checking */ | 
|  | 151 | specified_width = get_integer(&ptr, end, &format->width); | 
|  | 152 |  | 
|  | 153 | /* if specified_width is 0, we didn't consume any characters for | 
|  | 154 | the width. in that case, reset the width to -1, because | 
|  | 155 | get_integer() will have set it to zero */ | 
|  | 156 | if (specified_width == 0) { | 
|  | 157 | format->width = -1; | 
|  | 158 | } | 
|  | 159 |  | 
|  | 160 | /* Parse field precision */ | 
|  | 161 | if (end-ptr && ptr[0] == '.') { | 
|  | 162 | ptr++; | 
|  | 163 |  | 
|  | 164 | /* XXX add error checking */ | 
|  | 165 | specified_width = get_integer(&ptr, end, &format->precision); | 
|  | 166 |  | 
|  | 167 | /* not having a precision after a dot is an error */ | 
|  | 168 | if (specified_width == 0) { | 
|  | 169 | PyErr_Format(PyExc_ValueError, | 
|  | 170 | "Format specifier missing precision"); | 
|  | 171 | return 0; | 
|  | 172 | } | 
|  | 173 |  | 
|  | 174 | } | 
|  | 175 |  | 
|  | 176 | /* Finally, parse the type field */ | 
|  | 177 |  | 
|  | 178 | if (end-ptr > 1) { | 
|  | 179 | /* invalid conversion spec */ | 
|  | 180 | PyErr_Format(PyExc_ValueError, "Invalid conversion specification"); | 
|  | 181 | return 0; | 
|  | 182 | } | 
|  | 183 |  | 
|  | 184 | if (end-ptr == 1) { | 
|  | 185 | format->type = ptr[0]; | 
|  | 186 | ptr++; | 
|  | 187 | } | 
|  | 188 |  | 
|  | 189 | return 1; | 
|  | 190 | } | 
|  | 191 |  | 
|  | 192 |  | 
|  | 193 | /************************************************************************/ | 
|  | 194 | /*********** common routines for numeric formatting *********************/ | 
|  | 195 | /************************************************************************/ | 
|  | 196 |  | 
|  | 197 | /* describes the layout for an integer, see the comment in | 
|  | 198 | _calc_integer_widths() for details */ | 
|  | 199 | typedef struct { | 
|  | 200 | Py_ssize_t n_lpadding; | 
|  | 201 | Py_ssize_t n_spadding; | 
|  | 202 | Py_ssize_t n_rpadding; | 
|  | 203 | char lsign; | 
|  | 204 | Py_ssize_t n_lsign; | 
|  | 205 | char rsign; | 
|  | 206 | Py_ssize_t n_rsign; | 
|  | 207 | Py_ssize_t n_total; /* just a convenience, it's derivable from the | 
|  | 208 | other fields */ | 
|  | 209 | } NumberFieldWidths; | 
|  | 210 |  | 
|  | 211 | /* not all fields of format are used.  for example, precision is | 
|  | 212 | unused.  should this take discrete params in order to be more clear | 
|  | 213 | about what it does?  or is passing a single format parameter easier | 
|  | 214 | and more efficient enough to justify a little obfuscation? */ | 
|  | 215 | static void | 
|  | 216 | calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign, | 
|  | 217 | Py_ssize_t n_digits, const InternalFormatSpec *format) | 
|  | 218 | { | 
|  | 219 | r->n_lpadding = 0; | 
|  | 220 | r->n_spadding = 0; | 
|  | 221 | r->n_rpadding = 0; | 
|  | 222 | r->lsign = '\0'; | 
|  | 223 | r->n_lsign = 0; | 
|  | 224 | r->rsign = '\0'; | 
|  | 225 | r->n_rsign = 0; | 
|  | 226 |  | 
|  | 227 | /* the output will look like: | 
|  | 228 | |                                                           | | 
|  | 229 | | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> | | 
|  | 230 | |                                                           | | 
|  | 231 |  | 
|  | 232 | lsign and rsign are computed from format->sign and the actual | 
|  | 233 | sign of the number | 
|  | 234 |  | 
|  | 235 | digits is already known | 
|  | 236 |  | 
|  | 237 | the total width is either given, or computed from the | 
|  | 238 | actual digits | 
|  | 239 |  | 
|  | 240 | only one of lpadding, spadding, and rpadding can be non-zero, | 
|  | 241 | and it's calculated from the width and other fields | 
|  | 242 | */ | 
|  | 243 |  | 
|  | 244 | /* compute the various parts we're going to write */ | 
|  | 245 | if (format->sign == '+') { | 
|  | 246 | /* always put a + or - */ | 
|  | 247 | r->n_lsign = 1; | 
|  | 248 | r->lsign = (actual_sign == '-' ? '-' : '+'); | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 249 | } | 
|  | 250 | else if (format->sign == '(') { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 251 | if (actual_sign == '-') { | 
|  | 252 | r->n_lsign = 1; | 
|  | 253 | r->lsign = '('; | 
|  | 254 | r->n_rsign = 1; | 
|  | 255 | r->rsign = ')'; | 
|  | 256 | } | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 257 | } | 
|  | 258 | else if (format->sign == ' ') { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 259 | r->n_lsign = 1; | 
|  | 260 | r->lsign = (actual_sign == '-' ? '-' : ' '); | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 261 | } | 
|  | 262 | else { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 263 | /* non specified, or the default (-) */ | 
|  | 264 | if (actual_sign == '-') { | 
|  | 265 | r->n_lsign = 1; | 
|  | 266 | r->lsign = '-'; | 
|  | 267 | } | 
|  | 268 | } | 
|  | 269 |  | 
|  | 270 | /* now the number of padding characters */ | 
|  | 271 | if (format->width == -1) { | 
|  | 272 | /* no padding at all, nothing to do */ | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 273 | } | 
|  | 274 | else { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 275 | /* see if any padding is needed */ | 
|  | 276 | if (r->n_lsign + n_digits + r->n_rsign >= format->width) { | 
|  | 277 | /* no padding needed, we're already bigger than the | 
|  | 278 | requested width */ | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 279 | } | 
|  | 280 | else { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 281 | /* determine which of left, space, or right padding is | 
|  | 282 | needed */ | 
|  | 283 | Py_ssize_t padding = format->width - (r->n_lsign + n_digits + r->n_rsign); | 
|  | 284 | if (format->align == '<') | 
|  | 285 | r->n_rpadding = padding; | 
|  | 286 | else if (format->align == '>') | 
|  | 287 | r->n_lpadding = padding; | 
|  | 288 | else if (format->align == '^') { | 
|  | 289 | r->n_lpadding = padding / 2; | 
|  | 290 | r->n_rpadding = padding - r->n_lpadding; | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 291 | } | 
|  | 292 | else | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 293 | /* must be '=' */ | 
|  | 294 | r->n_spadding = padding; | 
|  | 295 | } | 
|  | 296 | } | 
|  | 297 | r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding + | 
|  | 298 | n_digits + r->n_rsign + r->n_rpadding; | 
|  | 299 | } | 
|  | 300 |  | 
|  | 301 | /* fill in the non-digit parts of a numbers's string representation, | 
|  | 302 | as determined in _calc_integer_widths().  returns the pointer to | 
|  | 303 | where the digits go. */ | 
|  | 304 | static STRINGLIB_CHAR * | 
|  | 305 | fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, | 
|  | 306 | Py_ssize_t n_digits, STRINGLIB_CHAR fill_char) | 
|  | 307 | { | 
|  | 308 | STRINGLIB_CHAR* p_digits; | 
|  | 309 |  | 
|  | 310 | if (spec->n_lpadding) { | 
|  | 311 | STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding); | 
|  | 312 | p_buf += spec->n_lpadding; | 
|  | 313 | } | 
|  | 314 | if (spec->n_lsign == 1) { | 
|  | 315 | *p_buf++ = spec->lsign; | 
|  | 316 | } | 
|  | 317 | if (spec->n_spadding) { | 
|  | 318 | STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding); | 
|  | 319 | p_buf += spec->n_spadding; | 
|  | 320 | } | 
|  | 321 | p_digits = p_buf; | 
|  | 322 | p_buf += n_digits; | 
|  | 323 | if (spec->n_rsign == 1) { | 
|  | 324 | *p_buf++ = spec->rsign; | 
|  | 325 | } | 
|  | 326 | if (spec->n_rpadding) { | 
|  | 327 | STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding); | 
|  | 328 | p_buf += spec->n_rpadding; | 
|  | 329 | } | 
|  | 330 | return p_digits; | 
|  | 331 | } | 
|  | 332 |  | 
|  | 333 | /************************************************************************/ | 
|  | 334 | /*********** string formatting ******************************************/ | 
|  | 335 | /************************************************************************/ | 
|  | 336 |  | 
|  | 337 | static PyObject * | 
|  | 338 | format_string_internal(PyObject *value, const InternalFormatSpec *format) | 
|  | 339 | { | 
|  | 340 | Py_ssize_t width; /* total field width */ | 
|  | 341 | Py_ssize_t lpad; | 
|  | 342 | STRINGLIB_CHAR *dst; | 
|  | 343 | STRINGLIB_CHAR *src = STRINGLIB_STR(value); | 
|  | 344 | Py_ssize_t len = STRINGLIB_LEN(value); | 
|  | 345 | PyObject *result = NULL; | 
|  | 346 |  | 
|  | 347 | /* sign is not allowed on strings */ | 
|  | 348 | if (format->sign != '\0') { | 
|  | 349 | PyErr_SetString(PyExc_ValueError, | 
|  | 350 | "Sign not allowed in string format specifier"); | 
|  | 351 | goto done; | 
|  | 352 | } | 
|  | 353 |  | 
|  | 354 | /* '=' alignment not allowed on strings */ | 
|  | 355 | if (format->align == '=') { | 
|  | 356 | PyErr_SetString(PyExc_ValueError, | 
|  | 357 | "'=' alignment not allowed " | 
|  | 358 | "in string format specifier"); | 
|  | 359 | goto done; | 
|  | 360 | } | 
|  | 361 |  | 
|  | 362 | /* if precision is specified, output no more that format.precision | 
|  | 363 | characters */ | 
|  | 364 | if (format->precision >= 0 && len >= format->precision) { | 
|  | 365 | len = format->precision; | 
|  | 366 | } | 
|  | 367 |  | 
|  | 368 | if (format->width >= 0) { | 
|  | 369 | width = format->width; | 
|  | 370 |  | 
|  | 371 | /* but use at least len characters */ | 
|  | 372 | if (len > width) { | 
|  | 373 | width = len; | 
|  | 374 | } | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 375 | } | 
|  | 376 | else { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 377 | /* not specified, use all of the chars and no more */ | 
|  | 378 | width = len; | 
|  | 379 | } | 
|  | 380 |  | 
|  | 381 | /* allocate the resulting string */ | 
|  | 382 | result = STRINGLIB_NEW(NULL, width); | 
|  | 383 | if (result == NULL) | 
|  | 384 | goto done; | 
|  | 385 |  | 
|  | 386 | /* now write into that space */ | 
|  | 387 | dst = STRINGLIB_STR(result); | 
|  | 388 |  | 
|  | 389 | /* figure out how much leading space we need, based on the | 
|  | 390 | aligning */ | 
|  | 391 | if (format->align == '>') | 
|  | 392 | lpad = width - len; | 
|  | 393 | else if (format->align == '^') | 
|  | 394 | lpad = (width - len) / 2; | 
|  | 395 | else | 
|  | 396 | lpad = 0; | 
|  | 397 |  | 
|  | 398 | /* if right aligning, increment the destination allow space on the | 
|  | 399 | left */ | 
|  | 400 | memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR)); | 
|  | 401 |  | 
|  | 402 | /* do any padding */ | 
|  | 403 | if (width > len) { | 
|  | 404 | STRINGLIB_CHAR fill_char = format->fill_char; | 
|  | 405 | if (fill_char == '\0') { | 
|  | 406 | /* use the default, if not specified */ | 
|  | 407 | fill_char = ' '; | 
|  | 408 | } | 
|  | 409 |  | 
|  | 410 | /* pad on left */ | 
|  | 411 | if (lpad) | 
|  | 412 | STRINGLIB_FILL(dst, fill_char, lpad); | 
|  | 413 |  | 
|  | 414 | /* pad on right */ | 
|  | 415 | if (width - len - lpad) | 
|  | 416 | STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad); | 
|  | 417 | } | 
|  | 418 |  | 
|  | 419 | done: | 
|  | 420 | return result; | 
|  | 421 | } | 
|  | 422 |  | 
|  | 423 |  | 
|  | 424 | /************************************************************************/ | 
|  | 425 | /*********** long formatting ********************************************/ | 
|  | 426 | /************************************************************************/ | 
|  | 427 |  | 
|  | 428 | static PyObject * | 
|  | 429 | format_long_internal(PyObject *value, const InternalFormatSpec *format) | 
|  | 430 | { | 
|  | 431 | PyObject *result = NULL; | 
|  | 432 | int total_leading_chars_to_skip = 0; /* also includes sign, if | 
|  | 433 | present */ | 
|  | 434 | STRINGLIB_CHAR sign = '\0'; | 
|  | 435 | STRINGLIB_CHAR *p; | 
|  | 436 | Py_ssize_t n_digits;       /* count of digits need from the computed | 
|  | 437 | string */ | 
|  | 438 | Py_ssize_t len; | 
|  | 439 | Py_ssize_t tmp; | 
|  | 440 | NumberFieldWidths spec; | 
|  | 441 | long x; | 
|  | 442 |  | 
|  | 443 | /* no precision allowed on integers */ | 
|  | 444 | if (format->precision != -1) { | 
|  | 445 | PyErr_SetString(PyExc_ValueError, | 
|  | 446 | "Precision not allowed in integer format specifier"); | 
|  | 447 | goto done; | 
|  | 448 | } | 
|  | 449 |  | 
|  | 450 |  | 
|  | 451 | /* special case for character formatting */ | 
|  | 452 | if (format->type == 'c') { | 
|  | 453 | /* error to specify a sign */ | 
|  | 454 | if (format->sign != '\0') { | 
|  | 455 | PyErr_SetString(PyExc_ValueError, | 
|  | 456 | "Sign not allowed with integer" | 
|  | 457 | " format specifier 'c'"); | 
|  | 458 | goto done; | 
|  | 459 | } | 
|  | 460 |  | 
|  | 461 | /* taken from unicodeobject.c formatchar() */ | 
|  | 462 | /* Integer input truncated to a character */ | 
|  | 463 | x = PyInt_AsLong(value); | 
|  | 464 | if (x == -1 && PyErr_Occurred()) | 
|  | 465 | goto done; | 
|  | 466 | #ifdef Py_UNICODE_WIDE | 
|  | 467 | if (x < 0 || x > 0x10ffff) { | 
|  | 468 | PyErr_SetString(PyExc_OverflowError, | 
|  | 469 | "%c arg not in range(0x110000) " | 
|  | 470 | "(wide Python build)"); | 
|  | 471 | goto done; | 
|  | 472 | } | 
|  | 473 | #else | 
|  | 474 | if (x < 0 || x > 0xffff) { | 
|  | 475 | PyErr_SetString(PyExc_OverflowError, | 
|  | 476 | "%c arg not in range(0x10000) " | 
|  | 477 | "(narrow Python build)"); | 
|  | 478 | goto done; | 
|  | 479 | } | 
|  | 480 | #endif | 
|  | 481 | result = STRINGLIB_NEW(NULL, 1); | 
|  | 482 | if (result == NULL) | 
|  | 483 | goto done; | 
|  | 484 | p = STRINGLIB_STR(result); | 
|  | 485 | p[0] = (Py_UNICODE) x; | 
|  | 486 | n_digits = len = 1; | 
| Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame^] | 487 | } | 
|  | 488 | else { | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 489 | int base; | 
|  | 490 | int format_leading_chars_to_skip;  /* characters added by | 
|  | 491 | PyNumber_ToBase that we | 
|  | 492 | want to skip over. | 
|  | 493 | instead of using them, | 
|  | 494 | we'll compute our | 
|  | 495 | own. */ | 
|  | 496 | /* compute the base and how many characters will be added by | 
|  | 497 | PyNumber_ToBase */ | 
|  | 498 | switch (format->type) { | 
|  | 499 | case 'b': | 
|  | 500 | base = 2; | 
|  | 501 | format_leading_chars_to_skip = 2; /* 0b */ | 
|  | 502 | break; | 
|  | 503 | case 'o': | 
|  | 504 | base = 8; | 
|  | 505 | format_leading_chars_to_skip = 2; /* 0o */ | 
|  | 506 | break; | 
|  | 507 | case 'x': | 
|  | 508 | case 'X': | 
|  | 509 | base = 16; | 
|  | 510 | format_leading_chars_to_skip = 2; /* 0x */ | 
|  | 511 | break; | 
|  | 512 | default:  /* shouldn't be needed, but stops a compiler warning */ | 
|  | 513 | case 'd': | 
|  | 514 | base = 10; | 
|  | 515 | format_leading_chars_to_skip = 0; | 
|  | 516 | break; | 
|  | 517 | } | 
|  | 518 |  | 
|  | 519 | /* do the hard part, converting to a string in a given base */ | 
|  | 520 | result = PyNumber_ToBase(value, base); | 
|  | 521 | if (result == NULL) | 
|  | 522 | goto done; | 
|  | 523 |  | 
|  | 524 | n_digits = STRINGLIB_LEN(result); | 
|  | 525 | len = n_digits; | 
|  | 526 | p = STRINGLIB_STR(result); | 
|  | 527 |  | 
|  | 528 | /* if X, convert to uppercase */ | 
|  | 529 | if (format->type == 'X') | 
|  | 530 | for (tmp = 0; tmp < len; tmp++) | 
|  | 531 | p[tmp] = STRINGLIB_TOUPPER(p[tmp]); | 
|  | 532 |  | 
|  | 533 | /* is a sign character present in the output?  if so, remember it | 
|  | 534 | and skip it */ | 
|  | 535 | sign = p[0]; | 
|  | 536 | if (sign == '-') { | 
|  | 537 | total_leading_chars_to_skip += 1; | 
|  | 538 | n_digits--; | 
|  | 539 | } | 
|  | 540 |  | 
|  | 541 | /* skip over the leading digits (0x, 0b, etc.) */ | 
|  | 542 | assert(n_digits >= format_leading_chars_to_skip + 1); | 
|  | 543 | n_digits -= format_leading_chars_to_skip; | 
|  | 544 | total_leading_chars_to_skip += format_leading_chars_to_skip; | 
|  | 545 | } | 
|  | 546 |  | 
|  | 547 | calc_number_widths(&spec, sign, n_digits, format); | 
|  | 548 |  | 
|  | 549 | /* if the buffer is getting bigger, realloc it.  if it's getting | 
|  | 550 | smaller, don't realloc because we need to move the results | 
|  | 551 | around first.  realloc after we've done that */ | 
|  | 552 |  | 
|  | 553 | if (spec.n_total > len) { | 
|  | 554 | if (STRINGLIB_RESIZE(&result, spec.n_total) < 0) | 
|  | 555 | goto done; | 
|  | 556 | /* recalc, because string might have moved */ | 
|  | 557 | p = STRINGLIB_STR(result); | 
|  | 558 | } | 
|  | 559 |  | 
|  | 560 | /* copy the characters into position first, since we're going to | 
|  | 561 | overwrite some of that space */ | 
|  | 562 | /* we need to move if the number of left padding in the output is | 
|  | 563 | different from the number of characters we need to skip */ | 
|  | 564 | if ((spec.n_lpadding + spec.n_lsign + spec.n_spadding) != | 
|  | 565 | total_leading_chars_to_skip) { | 
|  | 566 | memmove(p + (spec.n_lpadding + spec.n_lsign + spec.n_spadding), | 
|  | 567 | p + total_leading_chars_to_skip, | 
|  | 568 | n_digits * sizeof(STRINGLIB_CHAR)); | 
|  | 569 | } | 
|  | 570 |  | 
|  | 571 | /* now fill in the non-digit parts */ | 
|  | 572 | fill_number(p, &spec, n_digits, | 
|  | 573 | format->fill_char == '\0' ? ' ' : format->fill_char); | 
|  | 574 |  | 
|  | 575 | /* if we're getting smaller, realloc now */ | 
|  | 576 | if (spec.n_total < len) { | 
|  | 577 | if (STRINGLIB_RESIZE(&result, spec.n_total) < 0) | 
|  | 578 | goto done; | 
|  | 579 | } | 
|  | 580 |  | 
|  | 581 | done: | 
|  | 582 | return result; | 
|  | 583 | } | 
|  | 584 |  | 
|  | 585 |  | 
|  | 586 | /************************************************************************/ | 
|  | 587 | /*********** float formatting *******************************************/ | 
|  | 588 | /************************************************************************/ | 
|  | 589 |  | 
|  | 590 | /* taken from unicodeobject.c */ | 
|  | 591 | static Py_ssize_t | 
|  | 592 | strtounicode(Py_UNICODE *buffer, const char *charbuffer) | 
|  | 593 | { | 
|  | 594 | register Py_ssize_t i; | 
|  | 595 | Py_ssize_t len = strlen(charbuffer); | 
|  | 596 | for (i = len - 1; i >= 0; i--) | 
|  | 597 | buffer[i] = (Py_UNICODE) charbuffer[i]; | 
|  | 598 |  | 
|  | 599 | return len; | 
|  | 600 | } | 
|  | 601 |  | 
|  | 602 | /* the callback function to call to do the actual float formatting. | 
|  | 603 | it matches the definition of PyOS_ascii_formatd */ | 
|  | 604 | typedef char* | 
|  | 605 | (*DoubleSnprintfFunction)(char *buffer, size_t buf_len, | 
|  | 606 | const char *format, double d); | 
|  | 607 |  | 
|  | 608 | /* just a wrapper to make PyOS_snprintf look like DoubleSnprintfFunction */ | 
|  | 609 | static char* | 
|  | 610 | snprintf_double(char *buffer, size_t buf_len, const char *format, double d) | 
|  | 611 | { | 
|  | 612 | PyOS_snprintf(buffer, buf_len, format, d); | 
|  | 613 | return NULL; | 
|  | 614 | } | 
|  | 615 |  | 
|  | 616 | /* see FORMATBUFLEN in unicodeobject.c */ | 
|  | 617 | #define FLOAT_FORMATBUFLEN 120 | 
|  | 618 |  | 
|  | 619 | /* much of this is taken from unicodeobject.c */ | 
|  | 620 | /* use type instead of format->type, so that it can be overridden by | 
|  | 621 | format_number() */ | 
|  | 622 | static PyObject * | 
|  | 623 | _format_float(STRINGLIB_CHAR type, PyObject *value, | 
|  | 624 | const InternalFormatSpec *format, | 
|  | 625 | DoubleSnprintfFunction snprintf) | 
|  | 626 | { | 
|  | 627 | /* fmt = '%.' + `prec` + `type` + '%%' | 
|  | 628 | worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/ | 
|  | 629 | char fmt[20]; | 
|  | 630 |  | 
|  | 631 | /* taken from unicodeobject.c */ | 
|  | 632 | /* Worst case length calc to ensure no buffer overrun: | 
|  | 633 |  | 
|  | 634 | 'g' formats: | 
|  | 635 | fmt = %#.<prec>g | 
|  | 636 | buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp | 
|  | 637 | for any double rep.) | 
|  | 638 | len = 1 + prec + 1 + 2 + 5 = 9 + prec | 
|  | 639 |  | 
|  | 640 | 'f' formats: | 
|  | 641 | buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) | 
|  | 642 | len = 1 + 50 + 1 + prec = 52 + prec | 
|  | 643 |  | 
|  | 644 | If prec=0 the effective precision is 1 (the leading digit is | 
|  | 645 | always given), therefore increase the length by one. | 
|  | 646 |  | 
|  | 647 | */ | 
|  | 648 | char charbuf[FLOAT_FORMATBUFLEN]; | 
|  | 649 | Py_ssize_t n_digits; | 
|  | 650 | double x; | 
|  | 651 | Py_ssize_t precision = format->precision; | 
|  | 652 | PyObject *result = NULL; | 
|  | 653 | STRINGLIB_CHAR sign; | 
|  | 654 | char* trailing = ""; | 
|  | 655 | STRINGLIB_CHAR *p; | 
|  | 656 | NumberFieldWidths spec; | 
|  | 657 |  | 
|  | 658 | #if STRINGLIB_IS_UNICODE | 
|  | 659 | Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN]; | 
|  | 660 | #endif | 
|  | 661 |  | 
|  | 662 | /* first, do the conversion as 8-bit chars, using the platform's | 
|  | 663 | snprintf.  then, if needed, convert to unicode. */ | 
|  | 664 |  | 
|  | 665 | /* 'F' is the same as 'f', per the PEP */ | 
|  | 666 | if (type == 'F') | 
|  | 667 | type = 'f'; | 
|  | 668 |  | 
|  | 669 | x = PyFloat_AsDouble(value); | 
|  | 670 |  | 
|  | 671 | if (x == -1.0 && PyErr_Occurred()) | 
|  | 672 | goto done; | 
|  | 673 |  | 
|  | 674 | if (type == '%') { | 
|  | 675 | type = 'f'; | 
|  | 676 | x *= 100; | 
|  | 677 | trailing = "%"; | 
|  | 678 | } | 
|  | 679 |  | 
|  | 680 | if (precision < 0) | 
|  | 681 | precision = 6; | 
|  | 682 | if (type == 'f' && (fabs(x) / 1e25) >= 1e25) | 
|  | 683 | type = 'g'; | 
|  | 684 |  | 
|  | 685 | /* cast "type", because if we're in unicode we need to pass a | 
|  | 686 | 8-bit char.  this is safe, because we've restricted what "type" | 
|  | 687 | can be */ | 
|  | 688 | PyOS_snprintf(fmt, sizeof(fmt), "%%.%zd%c", precision, (char)type); | 
|  | 689 |  | 
|  | 690 | /* call the passed in function to do the actual formatting */ | 
|  | 691 | snprintf(charbuf, sizeof(charbuf), fmt, x); | 
|  | 692 |  | 
|  | 693 | /* adding trailing to fmt with PyOS_snprintf doesn't work, not | 
|  | 694 | sure why.  we'll just concatentate it here, no harm done.  we | 
|  | 695 | know we can't have a buffer overflow from the fmt size | 
|  | 696 | analysis */ | 
|  | 697 | strcat(charbuf, trailing); | 
|  | 698 |  | 
|  | 699 | /* rather than duplicate the code for snprintf for both unicode | 
|  | 700 | and 8 bit strings, we just use the 8 bit version and then | 
|  | 701 | convert to unicode in a separate code path.  that's probably | 
|  | 702 | the lesser of 2 evils. */ | 
|  | 703 | #if STRINGLIB_IS_UNICODE | 
|  | 704 | n_digits = strtounicode(unicodebuf, charbuf); | 
|  | 705 | p = unicodebuf; | 
|  | 706 | #else | 
|  | 707 | /* compute the length.  I believe this is done because the return | 
|  | 708 | value from snprintf above is unreliable */ | 
|  | 709 | n_digits = strlen(charbuf); | 
|  | 710 | p = charbuf; | 
|  | 711 | #endif | 
|  | 712 |  | 
|  | 713 | /* is a sign character present in the output?  if so, remember it | 
|  | 714 | and skip it */ | 
|  | 715 | sign = p[0]; | 
|  | 716 | if (sign == '-') { | 
|  | 717 | p++; | 
|  | 718 | n_digits--; | 
|  | 719 | } | 
|  | 720 |  | 
|  | 721 | calc_number_widths(&spec, sign, n_digits, format); | 
|  | 722 |  | 
|  | 723 | /* allocate a string with enough space */ | 
|  | 724 | result = STRINGLIB_NEW(NULL, spec.n_total); | 
|  | 725 | if (result == NULL) | 
|  | 726 | goto done; | 
|  | 727 |  | 
|  | 728 | /* fill in the non-digit parts */ | 
|  | 729 | fill_number(STRINGLIB_STR(result), &spec, n_digits, | 
|  | 730 | format->fill_char == '\0' ? ' ' : format->fill_char); | 
|  | 731 |  | 
|  | 732 | /* fill in the digit parts */ | 
|  | 733 | memmove(STRINGLIB_STR(result) + (spec.n_lpadding + spec.n_lsign + spec.n_spadding), | 
|  | 734 | p, | 
|  | 735 | n_digits * sizeof(STRINGLIB_CHAR)); | 
|  | 736 |  | 
|  | 737 | done: | 
|  | 738 | return result; | 
|  | 739 | } | 
|  | 740 |  | 
|  | 741 | static PyObject * | 
|  | 742 | format_float_internal(PyObject *value, const InternalFormatSpec *format) | 
|  | 743 | { | 
|  | 744 | if (format->type == 'n') | 
|  | 745 | return _format_float('f', value, format, snprintf_double); | 
|  | 746 | else | 
|  | 747 | return _format_float(format->type, value, format, PyOS_ascii_formatd); | 
|  | 748 | } | 
|  | 749 |  | 
|  | 750 | /************************************************************************/ | 
|  | 751 | /*********** built in formatters ****************************************/ | 
|  | 752 | /************************************************************************/ | 
|  | 753 |  | 
|  | 754 | PyObject * | 
|  | 755 | FORMAT_STRING(PyObject* value, PyObject* args) | 
|  | 756 | { | 
|  | 757 | PyObject *format_spec; | 
|  | 758 | PyObject *tmp = NULL; | 
|  | 759 | PyObject *result = NULL; | 
|  | 760 | InternalFormatSpec format; | 
|  | 761 |  | 
|  | 762 | if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) | 
|  | 763 | goto done; | 
|  | 764 | if (!STRINGLIB_CHECK(format_spec)) { | 
|  | 765 | PyErr_SetString(PyExc_TypeError, STRINGLIB_TYPE_NAME " object required"); | 
|  | 766 | goto done; | 
|  | 767 | } | 
|  | 768 |  | 
|  | 769 | /* check for the special case of zero length format spec, make | 
|  | 770 | it equivalent to str(value) */ | 
|  | 771 | if (STRINGLIB_LEN(format_spec) == 0) { | 
|  | 772 | result = STRINGLIB_TOSTR(value); | 
|  | 773 | goto done; | 
|  | 774 | } | 
|  | 775 |  | 
|  | 776 | /* parse the format_spec */ | 
|  | 777 | if (!parse_internal_render_format_spec(format_spec, &format, 's')) | 
|  | 778 | goto done; | 
|  | 779 |  | 
|  | 780 | /* type conversion? */ | 
|  | 781 | switch (format.type) { | 
|  | 782 | case 's': | 
|  | 783 | /* no type conversion needed, already a string.  do the formatting */ | 
|  | 784 | result = format_string_internal(value, &format); | 
|  | 785 | break; | 
|  | 786 | #if 0 | 
|  | 787 | case 'b': | 
|  | 788 | case 'c': | 
|  | 789 | case 'd': | 
|  | 790 | case 'o': | 
|  | 791 | case 'x': | 
|  | 792 | case 'X': | 
|  | 793 | /* convert to integer */ | 
|  | 794 | /* XXX: make a stringlib function to do this when backporting, | 
|  | 795 | since FromUnicode differs from FromString */ | 
|  | 796 | tmp = PyLong_FromUnicode(STRINGLIB_STR(value), STRINGLIB_LEN(value), 0); | 
|  | 797 | if (tmp == NULL) | 
|  | 798 | goto done; | 
|  | 799 | result = format_long_internal(tmp, &format); | 
|  | 800 | break; | 
|  | 801 |  | 
|  | 802 | case 'e': | 
|  | 803 | case 'E': | 
|  | 804 | case 'f': | 
|  | 805 | case 'F': | 
|  | 806 | case 'g': | 
|  | 807 | case 'G': | 
|  | 808 | case 'n': | 
|  | 809 | case '%': | 
|  | 810 | /* convert to float */ | 
|  | 811 | tmp = PyFloat_FromString(value); | 
|  | 812 | if (tmp == NULL) | 
|  | 813 | goto done; | 
|  | 814 | result = format_float_internal(tmp, &format); | 
|  | 815 | break; | 
|  | 816 | #endif | 
|  | 817 | default: | 
|  | 818 | /* unknown */ | 
|  | 819 | PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", | 
|  | 820 | format.type); | 
|  | 821 | goto done; | 
|  | 822 | } | 
|  | 823 |  | 
|  | 824 | done: | 
|  | 825 | Py_XDECREF(tmp); | 
|  | 826 | return result; | 
|  | 827 | } | 
|  | 828 |  | 
|  | 829 | PyObject * | 
|  | 830 | FORMAT_LONG(PyObject* value, PyObject* args) | 
|  | 831 | { | 
|  | 832 | PyObject *format_spec; | 
|  | 833 | PyObject *result = NULL; | 
|  | 834 | PyObject *tmp = NULL; | 
|  | 835 | InternalFormatSpec format; | 
|  | 836 |  | 
|  | 837 | if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) | 
|  | 838 | goto done; | 
|  | 839 | if (!STRINGLIB_CHECK(format_spec)) { | 
|  | 840 | PyErr_SetString(PyExc_TypeError, STRINGLIB_TYPE_NAME " object required"); | 
|  | 841 | goto done; | 
|  | 842 | } | 
|  | 843 |  | 
|  | 844 | /* check for the special case of zero length format spec, make | 
|  | 845 | it equivalent to str(value) */ | 
|  | 846 | if (STRINGLIB_LEN(format_spec) == 0) { | 
|  | 847 | result = STRINGLIB_TOSTR(value); | 
|  | 848 | goto done; | 
|  | 849 | } | 
|  | 850 |  | 
|  | 851 | /* parse the format_spec */ | 
|  | 852 | if (!parse_internal_render_format_spec(format_spec, &format, 'd')) | 
|  | 853 | goto done; | 
|  | 854 |  | 
|  | 855 | /* type conversion? */ | 
|  | 856 | switch (format.type) { | 
|  | 857 | #if 0 | 
|  | 858 | case 's': | 
|  | 859 | /* convert to string/unicode */ | 
|  | 860 | tmp = STRINGLIB_TOSTR(value); | 
|  | 861 | if (tmp == NULL) | 
|  | 862 | goto done; | 
|  | 863 | result = format_string_internal(tmp, &format); | 
|  | 864 | break; | 
|  | 865 | #endif | 
|  | 866 | case 'b': | 
|  | 867 | case 'c': | 
|  | 868 | case 'd': | 
|  | 869 | case 'o': | 
|  | 870 | case 'x': | 
|  | 871 | case 'X': | 
|  | 872 | /* no type conversion needed, already an int.  do the formatting */ | 
|  | 873 | result = format_long_internal(value, &format); | 
|  | 874 | break; | 
|  | 875 |  | 
|  | 876 | case 'e': | 
|  | 877 | case 'E': | 
|  | 878 | case 'f': | 
|  | 879 | case 'F': | 
|  | 880 | case 'g': | 
|  | 881 | case 'G': | 
|  | 882 | case 'n': | 
|  | 883 | case '%': | 
|  | 884 | /* convert to float */ | 
|  | 885 | tmp = PyNumber_Float(value); | 
|  | 886 | if (tmp == NULL) | 
|  | 887 | goto done; | 
|  | 888 | result = format_float_internal(value, &format); | 
|  | 889 | break; | 
|  | 890 |  | 
|  | 891 | default: | 
|  | 892 | /* unknown */ | 
|  | 893 | PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", | 
|  | 894 | format.type); | 
|  | 895 | goto done; | 
|  | 896 | } | 
|  | 897 |  | 
|  | 898 | done: | 
|  | 899 | Py_XDECREF(tmp); | 
|  | 900 | return result; | 
|  | 901 | } | 
|  | 902 |  | 
|  | 903 | PyObject * | 
|  | 904 | FORMAT_FLOAT(PyObject *value, PyObject *args) | 
|  | 905 | { | 
|  | 906 | PyObject *format_spec; | 
|  | 907 | PyObject *result = NULL; | 
|  | 908 | PyObject *tmp = NULL; | 
|  | 909 | InternalFormatSpec format; | 
|  | 910 |  | 
|  | 911 | if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) | 
|  | 912 | goto done; | 
|  | 913 | if (!STRINGLIB_CHECK(format_spec)) { | 
|  | 914 | PyErr_SetString(PyExc_TypeError, STRINGLIB_TYPE_NAME " object required"); | 
|  | 915 | goto done; | 
|  | 916 | } | 
|  | 917 |  | 
|  | 918 | /* check for the special case of zero length format spec, make | 
|  | 919 | it equivalent to str(value) */ | 
|  | 920 | if (STRINGLIB_LEN(format_spec) == 0) { | 
|  | 921 | result = STRINGLIB_TOSTR(value); | 
|  | 922 | goto done; | 
|  | 923 | } | 
|  | 924 |  | 
|  | 925 | /* parse the format_spec */ | 
|  | 926 | if (!parse_internal_render_format_spec(format_spec, &format, 'g')) | 
|  | 927 | goto done; | 
|  | 928 |  | 
|  | 929 | /* type conversion? */ | 
|  | 930 | switch (format.type) { | 
|  | 931 | #if 0 | 
|  | 932 | case 's': | 
|  | 933 | /* convert to string/unicode */ | 
|  | 934 | tmp = STRINGLIB_TOSTR(value); | 
|  | 935 | if (tmp == NULL) | 
|  | 936 | goto done; | 
|  | 937 | result = format_string_internal(tmp, &format); | 
|  | 938 | break; | 
|  | 939 | #endif | 
|  | 940 | case 'b': | 
|  | 941 | case 'c': | 
|  | 942 | case 'd': | 
|  | 943 | case 'o': | 
|  | 944 | case 'x': | 
|  | 945 | case 'X': | 
|  | 946 | /* convert to integer */ | 
|  | 947 | tmp = PyNumber_Long(value); | 
|  | 948 | if (tmp == NULL) | 
|  | 949 | goto done; | 
|  | 950 | result = format_long_internal(tmp, &format); | 
|  | 951 | break; | 
|  | 952 |  | 
|  | 953 | case 'e': | 
|  | 954 | case 'E': | 
|  | 955 | case 'f': | 
|  | 956 | case 'F': | 
|  | 957 | case 'g': | 
|  | 958 | case 'G': | 
|  | 959 | case 'n': | 
|  | 960 | case '%': | 
|  | 961 | /* no conversion, already a float.  do the formatting */ | 
|  | 962 | result = format_float_internal(value, &format); | 
|  | 963 | break; | 
|  | 964 |  | 
|  | 965 | default: | 
|  | 966 | /* unknown */ | 
|  | 967 | PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", | 
|  | 968 | format.type); | 
|  | 969 | goto done; | 
|  | 970 | } | 
|  | 971 |  | 
|  | 972 | done: | 
|  | 973 | Py_XDECREF(tmp); | 
|  | 974 | return result; | 
|  | 975 | } |