blob: db8c27c9e48a7982c1a90ceb50ccad7623beac11 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
159
160 format->fill_char = '\0';
161 format->align = default_align;
162 format->alternate = 0;
163 format->sign = '\0';
164 format->width = -1;
165 format->thousands_separators = 0;
166 format->precision = -1;
167 format->type = default_type;
168
169 /* If the second char is an alignment token,
170 then parse the fill char */
171 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
172 format->align = READ_spec(pos+1);
173 format->fill_char = READ_spec(pos);
174 align_specified = 1;
175 pos += 2;
176 }
177 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
178 format->align = READ_spec(pos);
179 align_specified = 1;
180 ++pos;
181 }
182
183 /* Parse the various sign options */
184 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
185 format->sign = READ_spec(pos);
186 ++pos;
187 }
188
189 /* If the next character is #, we're in alternate mode. This only
190 applies to integers. */
191 if (end-pos >= 1 && READ_spec(pos) == '#') {
192 format->alternate = 1;
193 ++pos;
194 }
195
196 /* The special case for 0-padding (backwards compat) */
197 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
198 format->fill_char = '0';
199 if (!align_specified) {
200 format->align = '=';
201 }
202 ++pos;
203 }
204
205 consumed = get_integer(format_spec, &pos, end, &format->width);
206 if (consumed == -1)
207 /* Overflow error. Exception already set. */
208 return 0;
209
210 /* If consumed is 0, we didn't consume any characters for the
211 width. In that case, reset the width to -1, because
212 get_integer() will have set it to zero. -1 is how we record
213 that the width wasn't specified. */
214 if (consumed == 0)
215 format->width = -1;
216
217 /* Comma signifies add thousands separators */
218 if (end-pos && READ_spec(pos) == ',') {
219 format->thousands_separators = 1;
220 ++pos;
221 }
222
223 /* Parse field precision */
224 if (end-pos && READ_spec(pos) == '.') {
225 ++pos;
226
227 consumed = get_integer(format_spec, &pos, end, &format->precision);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* Not having a precision after a dot is an error. */
233 if (consumed == 0) {
234 PyErr_Format(PyExc_ValueError,
235 "Format specifier missing precision");
236 return 0;
237 }
238
239 }
240
241 /* Finally, parse the type field. */
242
243 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500244 /* More than one char remain, invalid format specifier. */
245 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 return 0;
247 }
248
249 if (end-pos == 1) {
250 format->type = READ_spec(pos);
251 ++pos;
252 }
253
254 /* Do as much validating as we can, just by looking at the format
255 specifier. Do not take into account what type of formatting
256 we're doing (int, float, string). */
257
258 if (format->thousands_separators) {
259 switch (format->type) {
260 case 'd':
261 case 'e':
262 case 'f':
263 case 'g':
264 case 'E':
265 case 'G':
266 case '%':
267 case 'F':
268 case '\0':
269 /* These are allowed. See PEP 378.*/
270 break;
271 default:
272 invalid_comma_type(format->type);
273 return 0;
274 }
275 }
276
Victor Stinnera4ac6002012-01-21 15:50:49 +0100277 assert (format->align <= 127);
278 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 return 1;
280}
281
282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* Figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else if (align == '<' || align == '=')
306 *n_lpadding = 0;
307 else {
308 /* We should never have an unspecified alignment. */
309 *n_lpadding = 0;
310 assert(0);
311 }
312
313 *n_rpadding = *n_total - nchars - *n_lpadding;
314}
315
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316/* Do the padding, and return a pointer to where the caller-supplied
317 content goes. */
318static Py_ssize_t
319fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
320 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
321 Py_ssize_t n_rpadding)
322{
323 /* Pad on left. */
324 if (n_lpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100325 PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326
327 /* Pad on right. */
328 if (n_rpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100329 PyUnicode_Fill(s, start + nchars + n_lpadding,
330 start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331
332 /* Pointer to the user content. */
333 return start + n_lpadding;
334}
335
336/************************************************************************/
337/*********** common routines for numeric formatting *********************/
338/************************************************************************/
339
340/* Locale type codes. */
341#define LT_CURRENT_LOCALE 0
342#define LT_DEFAULT_LOCALE 1
343#define LT_NO_LOCALE 2
344
345/* Locale info needed for formatting integers and the part of floats
346 before and including the decimal. Note that locales only support
347 8-bit chars, not unicode. */
348typedef struct {
349 char *decimal_point;
350 char *thousands_sep;
351 char *grouping;
352} LocaleInfo;
353
354/* describes the layout for an integer, see the comment in
355 calc_number_widths() for details */
356typedef struct {
357 Py_ssize_t n_lpadding;
358 Py_ssize_t n_prefix;
359 Py_ssize_t n_spadding;
360 Py_ssize_t n_rpadding;
361 char sign;
362 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
363 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
364 any grouping chars. */
365 Py_ssize_t n_decimal; /* 0 if only an integer */
366 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
367 excluding the decimal itself, if
368 present. */
369
370 /* These 2 are not the widths of fields, but are needed by
371 STRINGLIB_GROUPING. */
372 Py_ssize_t n_digits; /* The number of digits before a decimal
373 or exponent. */
374 Py_ssize_t n_min_width; /* The min_width we used when we computed
375 the n_grouped_digits width. */
376} NumberFieldWidths;
377
378
379/* Given a number of the form:
380 digits[remainder]
381 where ptr points to the start and end points to the end, find where
382 the integer part ends. This could be a decimal, an exponent, both,
383 or neither.
384 If a decimal point is present, set *has_decimal and increment
385 remainder beyond it.
386 Results are undefined (but shouldn't crash) for improperly
387 formatted strings.
388*/
389static void
390parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
391 Py_ssize_t *n_remainder, int *has_decimal)
392{
393 Py_ssize_t remainder;
394
395 while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
396 ++pos;
397 remainder = pos;
398
399 /* Does remainder start with a decimal point? */
400 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
401
402 /* Skip the decimal point. */
403 if (*has_decimal)
404 remainder++;
405
406 *n_remainder = end - remainder;
407}
408
409/* not all fields of format are used. for example, precision is
410 unused. should this take discrete params in order to be more clear
411 about what it does? or is passing a single format parameter easier
412 and more efficient enough to justify a little obfuscation? */
413static Py_ssize_t
414calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
415 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
416 Py_ssize_t n_end, Py_ssize_t n_remainder,
417 int has_decimal, const LocaleInfo *locale,
418 const InternalFormatSpec *format)
419{
420 Py_ssize_t n_non_digit_non_padding;
421 Py_ssize_t n_padding;
422
423 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
424 spec->n_lpadding = 0;
425 spec->n_prefix = n_prefix;
426 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
427 spec->n_remainder = n_remainder;
428 spec->n_spadding = 0;
429 spec->n_rpadding = 0;
430 spec->sign = '\0';
431 spec->n_sign = 0;
432
433 /* the output will look like:
434 | |
435 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
436 | |
437
438 sign is computed from format->sign and the actual
439 sign of the number
440
441 prefix is given (it's for the '0x' prefix)
442
443 digits is already known
444
445 the total width is either given, or computed from the
446 actual digits
447
448 only one of lpadding, spadding, and rpadding can be non-zero,
449 and it's calculated from the width and other fields
450 */
451
452 /* compute the various parts we're going to write */
453 switch (format->sign) {
454 case '+':
455 /* always put a + or - */
456 spec->n_sign = 1;
457 spec->sign = (sign_char == '-' ? '-' : '+');
458 break;
459 case ' ':
460 spec->n_sign = 1;
461 spec->sign = (sign_char == '-' ? '-' : ' ');
462 break;
463 default:
464 /* Not specified, or the default (-) */
465 if (sign_char == '-') {
466 spec->n_sign = 1;
467 spec->sign = '-';
468 }
469 }
470
471 /* The number of chars used for non-digits and non-padding. */
472 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
473 spec->n_remainder;
474
475 /* min_width can go negative, that's okay. format->width == -1 means
476 we don't care. */
477 if (format->fill_char == '0' && format->align == '=')
478 spec->n_min_width = format->width - n_non_digit_non_padding;
479 else
480 spec->n_min_width = 0;
481
482 if (spec->n_digits == 0)
483 /* This case only occurs when using 'c' formatting, we need
484 to special case it because the grouping code always wants
485 to have at least one character. */
486 spec->n_grouped_digits = 0;
487 else
488 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerc3cec782011-10-05 21:24:08 +0200489 NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200490 spec->n_digits, spec->n_min_width,
491 locale->grouping, locale->thousands_sep);
492
493 /* Given the desired width and the total of digit and non-digit
494 space we consume, see if we need any padding. format->width can
495 be negative (meaning no padding), but this code still works in
496 that case. */
497 n_padding = format->width -
498 (n_non_digit_non_padding + spec->n_grouped_digits);
499 if (n_padding > 0) {
500 /* Some padding is needed. Determine if it's left, space, or right. */
501 switch (format->align) {
502 case '<':
503 spec->n_rpadding = n_padding;
504 break;
505 case '^':
506 spec->n_lpadding = n_padding / 2;
507 spec->n_rpadding = n_padding - spec->n_lpadding;
508 break;
509 case '=':
510 spec->n_spadding = n_padding;
511 break;
512 case '>':
513 spec->n_lpadding = n_padding;
514 break;
515 default:
516 /* Shouldn't get here, but treat it as '>' */
517 spec->n_lpadding = n_padding;
518 assert(0);
519 break;
520 }
521 }
522 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
523 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
524 spec->n_remainder + spec->n_rpadding;
525}
526
527/* Fill in the digit parts of a numbers's string representation,
528 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200529 Return -1 on error, or 0 on success. */
530static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200531fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
532 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200533 PyObject *prefix, Py_ssize_t p_start,
534 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200535 LocaleInfo *locale, int toupper)
536{
537 /* Used to keep track of digits, decimal, and remainder. */
538 Py_ssize_t d_pos = d_start;
539 unsigned int kind = PyUnicode_KIND(out);
540 void *data = PyUnicode_DATA(out);
541
542#ifndef NDEBUG
543 Py_ssize_t r;
544#endif
545
546 if (spec->n_lpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100547 PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200548 pos += spec->n_lpadding;
549 }
550 if (spec->n_sign == 1) {
551 PyUnicode_WRITE(kind, data, pos++, spec->sign);
552 }
553 if (spec->n_prefix) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200554 if (PyUnicode_CopyCharacters(out, pos,
555 prefix, p_start,
556 spec->n_prefix) < 0)
557 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200558 if (toupper) {
559 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500560 for (t = 0; t < spec->n_prefix; t++) {
561 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100562 assert (c <= 127);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500563 PyUnicode_WRITE(kind, data, pos + t, Py_TOUPPER(c));
564 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200565 }
566 pos += spec->n_prefix;
567 }
568 if (spec->n_spadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100569 PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200570 pos += spec->n_spadding;
571 }
572
573 /* Only for type 'c' special case, it has no digits. */
574 if (spec->n_digits != 0) {
575 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200576 char *pdigits;
577 if (PyUnicode_READY(digits))
578 return -1;
579 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200580 if (PyUnicode_KIND(digits) < kind) {
581 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200582 if (pdigits == NULL)
583 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200584 }
585#ifndef NDEBUG
586 r =
587#endif
588 _PyUnicode_InsertThousandsGrouping(
Victor Stinnerc3cec782011-10-05 21:24:08 +0200589 out, kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200590 (char*)data + kind * pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200591 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200592 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200593 spec->n_digits, spec->n_min_width,
594 locale->grouping, locale->thousands_sep);
595#ifndef NDEBUG
596 assert(r == spec->n_grouped_digits);
597#endif
598 if (PyUnicode_KIND(digits) < kind)
599 PyMem_Free(pdigits);
600 d_pos += spec->n_digits;
601 }
602 if (toupper) {
603 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500604 for (t = 0; t < spec->n_grouped_digits; t++) {
605 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
606 if (c > 127) {
607 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
608 return -1;
609 }
610 PyUnicode_WRITE(kind, data, pos + t, Py_TOUPPER(c));
611 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200612 }
613 pos += spec->n_grouped_digits;
614
615 if (spec->n_decimal) {
616 Py_ssize_t t;
617 for (t = 0; t < spec->n_decimal; ++t)
618 PyUnicode_WRITE(kind, data, pos + t,
619 locale->decimal_point[t]);
620 pos += spec->n_decimal;
621 d_pos += 1;
622 }
623
624 if (spec->n_remainder) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200625 if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
626 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200627 pos += spec->n_remainder;
628 d_pos += spec->n_remainder;
629 }
630
631 if (spec->n_rpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100632 PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 pos += spec->n_rpadding;
634 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200635 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200636}
637
638static char no_grouping[1] = {CHAR_MAX};
639
640/* Find the decimal point character(s?), thousands_separator(s?), and
641 grouping description, either for the current locale if type is
642 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
643 none if LT_NO_LOCALE. */
644static void
645get_locale_info(int type, LocaleInfo *locale_info)
646{
647 switch (type) {
648 case LT_CURRENT_LOCALE: {
649 struct lconv *locale_data = localeconv();
650 locale_info->decimal_point = locale_data->decimal_point;
651 locale_info->thousands_sep = locale_data->thousands_sep;
652 locale_info->grouping = locale_data->grouping;
653 break;
654 }
655 case LT_DEFAULT_LOCALE:
656 locale_info->decimal_point = ".";
657 locale_info->thousands_sep = ",";
658 locale_info->grouping = "\3"; /* Group every 3 characters. The
659 (implicit) trailing 0 means repeat
660 infinitely. */
661 break;
662 case LT_NO_LOCALE:
663 locale_info->decimal_point = ".";
664 locale_info->thousands_sep = "";
665 locale_info->grouping = no_grouping;
666 break;
667 default:
668 assert(0);
669 }
670}
671
672/************************************************************************/
673/*********** string formatting ******************************************/
674/************************************************************************/
675
676static PyObject *
677format_string_internal(PyObject *value, const InternalFormatSpec *format)
678{
679 Py_ssize_t lpad;
680 Py_ssize_t rpad;
681 Py_ssize_t total;
682 Py_ssize_t pos;
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200683 Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200684 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100685 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200686
687 /* sign is not allowed on strings */
688 if (format->sign != '\0') {
689 PyErr_SetString(PyExc_ValueError,
690 "Sign not allowed in string format specifier");
691 goto done;
692 }
693
694 /* alternate is not allowed on strings */
695 if (format->alternate) {
696 PyErr_SetString(PyExc_ValueError,
697 "Alternate form (#) not allowed in string format "
698 "specifier");
699 goto done;
700 }
701
702 /* '=' alignment not allowed on strings */
703 if (format->align == '=') {
704 PyErr_SetString(PyExc_ValueError,
705 "'=' alignment not allowed "
706 "in string format specifier");
707 goto done;
708 }
709
710 /* if precision is specified, output no more that format.precision
711 characters */
712 if (format->precision >= 0 && len >= format->precision) {
713 len = format->precision;
714 }
715
716 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
717
Victor Stinnera4ac6002012-01-21 15:50:49 +0100718 if (lpad != 0 || rpad != 0)
719 maxchar = Py_MAX(maxchar, format->fill_char);
720
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200721 /* allocate the resulting string */
722 result = PyUnicode_New(total, maxchar);
723 if (result == NULL)
724 goto done;
725
726 /* Write into that space. First the padding. */
727 pos = fill_padding(result, 0, len,
728 format->fill_char=='\0'?' ':format->fill_char,
729 lpad, rpad);
730
731 /* Then the source string. */
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200732 if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
733 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734
735done:
736 return result;
737}
738
739
740/************************************************************************/
741/*********** long formatting ********************************************/
742/************************************************************************/
743
744typedef PyObject*
745(*IntOrLongToString)(PyObject *value, int base);
746
747static PyObject *
748format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
749 IntOrLongToString tostring)
750{
751 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100752 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200753 PyObject *tmp = NULL;
754 Py_ssize_t inumeric_chars;
755 Py_UCS4 sign_char = '\0';
756 Py_ssize_t n_digits; /* count of digits need from the computed
757 string */
758 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
759 produces non-digits */
760 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
761 Py_ssize_t n_total;
762 Py_ssize_t prefix;
763 NumberFieldWidths spec;
764 long x;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200765 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200766
767 /* Locale settings, either from the actual locale or
768 from a hard-code pseudo-locale */
769 LocaleInfo locale;
770
771 /* no precision allowed on integers */
772 if (format->precision != -1) {
773 PyErr_SetString(PyExc_ValueError,
774 "Precision not allowed in integer format specifier");
775 goto done;
776 }
777
778 /* special case for character formatting */
779 if (format->type == 'c') {
780 /* error to specify a sign */
781 if (format->sign != '\0') {
782 PyErr_SetString(PyExc_ValueError,
783 "Sign not allowed with integer"
784 " format specifier 'c'");
785 goto done;
786 }
787
788 /* taken from unicodeobject.c formatchar() */
789 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200790 x = PyLong_AsLong(value);
791 if (x == -1 && PyErr_Occurred())
792 goto done;
793 if (x < 0 || x > 0x10ffff) {
794 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100795 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200796 goto done;
797 }
798 tmp = PyUnicode_FromOrdinal(x);
799 inumeric_chars = 0;
800 n_digits = 1;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100801 maxchar = Py_MAX(maxchar, x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802
803 /* As a sort-of hack, we tell calc_number_widths that we only
804 have "remainder" characters. calc_number_widths thinks
805 these are characters that don't get formatted, only copied
806 into the output string. We do this for 'c' formatting,
807 because the characters are likely to be non-digits. */
808 n_remainder = 1;
809 }
810 else {
811 int base;
812 int leading_chars_to_skip = 0; /* Number of characters added by
813 PyNumber_ToBase that we want to
814 skip over. */
815
816 /* Compute the base and how many characters will be added by
817 PyNumber_ToBase */
818 switch (format->type) {
819 case 'b':
820 base = 2;
821 leading_chars_to_skip = 2; /* 0b */
822 break;
823 case 'o':
824 base = 8;
825 leading_chars_to_skip = 2; /* 0o */
826 break;
827 case 'x':
828 case 'X':
829 base = 16;
830 leading_chars_to_skip = 2; /* 0x */
831 break;
832 default: /* shouldn't be needed, but stops a compiler warning */
833 case 'd':
834 case 'n':
835 base = 10;
836 break;
837 }
838
839 /* The number of prefix chars is the same as the leading
840 chars to skip */
841 if (format->alternate)
842 n_prefix = leading_chars_to_skip;
843
844 /* Do the hard part, converting to a string in a given base */
845 tmp = tostring(value, base);
846 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
847 goto done;
848
849 inumeric_chars = 0;
850 n_digits = PyUnicode_GET_LENGTH(tmp);
851
852 prefix = inumeric_chars;
853
854 /* Is a sign character present in the output? If so, remember it
855 and skip it */
856 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
857 sign_char = '-';
858 ++prefix;
859 ++leading_chars_to_skip;
860 }
861
862 /* Skip over the leading chars (0x, 0b, etc.) */
863 n_digits -= leading_chars_to_skip;
864 inumeric_chars += leading_chars_to_skip;
865 }
866
867 /* Determine the grouping, separator, and decimal point, if any. */
868 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
869 (format->thousands_separators ?
870 LT_DEFAULT_LOCALE :
871 LT_NO_LOCALE),
872 &locale);
873
874 /* Calculate how much memory we'll need. */
875 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
876 inumeric_chars + n_digits, n_remainder, 0, &locale, format);
877
Victor Stinnera4ac6002012-01-21 15:50:49 +0100878 if (spec.n_lpadding || spec.n_spadding || spec.n_rpadding)
879 maxchar = Py_MAX(maxchar, format->fill_char);
880
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200881 /* Allocate the memory. */
882 result = PyUnicode_New(n_total, maxchar);
883 if (!result)
884 goto done;
885
886 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +0200887 err = fill_number(result, 0, &spec,
888 tmp, inumeric_chars, inumeric_chars + n_digits,
889 tmp, prefix,
890 format->fill_char == '\0' ? ' ' : format->fill_char,
891 &locale, format->type == 'X');
892 if (err)
893 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200894
895done:
896 Py_XDECREF(tmp);
897 return result;
898}
899
900/************************************************************************/
901/*********** float formatting *******************************************/
902/************************************************************************/
903
904static PyObject*
905strtounicode(char *charbuffer, Py_ssize_t len)
906{
907 return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
908}
909
910/* much of this is taken from unicodeobject.c */
911static PyObject *
912format_float_internal(PyObject *value,
913 const InternalFormatSpec *format)
914{
915 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
916 Py_ssize_t n_digits;
917 Py_ssize_t n_remainder;
918 Py_ssize_t n_total;
919 int has_decimal;
920 double val;
921 Py_ssize_t precision = format->precision;
922 Py_ssize_t default_precision = 6;
923 Py_UCS4 type = format->type;
924 int add_pct = 0;
925 Py_ssize_t index;
926 NumberFieldWidths spec;
927 int flags = 0;
928 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100929 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200930 Py_UCS4 sign_char = '\0';
931 int float_type; /* Used to see if we have a nan, inf, or regular float. */
932 PyObject *unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200933 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200934
935 /* Locale settings, either from the actual locale or
936 from a hard-code pseudo-locale */
937 LocaleInfo locale;
938
939 if (format->alternate)
940 flags |= Py_DTSF_ALT;
941
942 if (type == '\0') {
943 /* Omitted type specifier. Behaves in the same way as repr(x)
944 and str(x) if no precision is given, else like 'g', but with
945 at least one digit after the decimal point. */
946 flags |= Py_DTSF_ADD_DOT_0;
947 type = 'r';
948 default_precision = 0;
949 }
950
951 if (type == 'n')
952 /* 'n' is the same as 'g', except for the locale used to
953 format the result. We take care of that later. */
954 type = 'g';
955
956 val = PyFloat_AsDouble(value);
957 if (val == -1.0 && PyErr_Occurred())
958 goto done;
959
960 if (type == '%') {
961 type = 'f';
962 val *= 100;
963 add_pct = 1;
964 }
965
966 if (precision < 0)
967 precision = default_precision;
968 else if (type == 'r')
969 type = 'g';
970
971 /* Cast "type", because if we're in unicode we need to pass a
972 8-bit char. This is safe, because we've restricted what "type"
973 can be. */
974 buf = PyOS_double_to_string(val, (char)type, precision, flags,
975 &float_type);
976 if (buf == NULL)
977 goto done;
978 n_digits = strlen(buf);
979
980 if (add_pct) {
981 /* We know that buf has a trailing zero (since we just called
982 strlen() on it), and we don't use that fact any more. So we
983 can just write over the trailing zero. */
984 buf[n_digits] = '%';
985 n_digits += 1;
986 }
987
988 /* Since there is no unicode version of PyOS_double_to_string,
989 just use the 8 bit version and then convert to unicode. */
990 unicode_tmp = strtounicode(buf, n_digits);
991 if (unicode_tmp == NULL)
992 goto done;
993 index = 0;
994
995 /* Is a sign character present in the output? If so, remember it
996 and skip it */
997 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
998 sign_char = '-';
999 ++index;
1000 --n_digits;
1001 }
1002
1003 /* Determine if we have any "remainder" (after the digits, might include
1004 decimal or exponent or both (or neither)) */
1005 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1006
1007 /* Determine the grouping, separator, and decimal point, if any. */
1008 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1009 (format->thousands_separators ?
1010 LT_DEFAULT_LOCALE :
1011 LT_NO_LOCALE),
1012 &locale);
1013
1014 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001015 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001016 index + n_digits, n_remainder, has_decimal,
1017 &locale, format);
1018
Victor Stinnera4ac6002012-01-21 15:50:49 +01001019 if (spec.n_lpadding || spec.n_spadding || spec.n_rpadding)
1020 maxchar = Py_MAX(maxchar, format->fill_char);
1021
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001022 /* Allocate the memory. */
1023 result = PyUnicode_New(n_total, maxchar);
1024 if (result == NULL)
1025 goto done;
1026
1027 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001028 err = fill_number(result, 0, &spec,
1029 unicode_tmp, index, index + n_digits,
1030 NULL, 0,
1031 format->fill_char == '\0' ? ' ' : format->fill_char,
1032 &locale, 0);
1033 if (err)
1034 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001035
1036done:
1037 PyMem_Free(buf);
1038 Py_DECREF(unicode_tmp);
1039 return result;
1040}
1041
1042/************************************************************************/
1043/*********** complex formatting *****************************************/
1044/************************************************************************/
1045
1046static PyObject *
1047format_complex_internal(PyObject *value,
1048 const InternalFormatSpec *format)
1049{
1050 double re;
1051 double im;
1052 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1053 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1054
1055 InternalFormatSpec tmp_format = *format;
1056 Py_ssize_t n_re_digits;
1057 Py_ssize_t n_im_digits;
1058 Py_ssize_t n_re_remainder;
1059 Py_ssize_t n_im_remainder;
1060 Py_ssize_t n_re_total;
1061 Py_ssize_t n_im_total;
1062 int re_has_decimal;
1063 int im_has_decimal;
1064 Py_ssize_t precision = format->precision;
1065 Py_ssize_t default_precision = 6;
1066 Py_UCS4 type = format->type;
1067 Py_ssize_t i_re;
1068 Py_ssize_t i_im;
1069 NumberFieldWidths re_spec;
1070 NumberFieldWidths im_spec;
1071 int flags = 0;
1072 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001073 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001074 int rkind;
1075 void *rdata;
1076 Py_ssize_t index;
1077 Py_UCS4 re_sign_char = '\0';
1078 Py_UCS4 im_sign_char = '\0';
1079 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1080 int im_float_type;
1081 int add_parens = 0;
1082 int skip_re = 0;
1083 Py_ssize_t lpad;
1084 Py_ssize_t rpad;
1085 Py_ssize_t total;
1086 PyObject *re_unicode_tmp = NULL;
1087 PyObject *im_unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +02001088 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089
1090 /* Locale settings, either from the actual locale or
1091 from a hard-code pseudo-locale */
1092 LocaleInfo locale;
1093
1094 /* Zero padding is not allowed. */
1095 if (format->fill_char == '0') {
1096 PyErr_SetString(PyExc_ValueError,
1097 "Zero padding is not allowed in complex format "
1098 "specifier");
1099 goto done;
1100 }
1101
1102 /* Neither is '=' alignment . */
1103 if (format->align == '=') {
1104 PyErr_SetString(PyExc_ValueError,
1105 "'=' alignment flag is not allowed in complex format "
1106 "specifier");
1107 goto done;
1108 }
1109
1110 re = PyComplex_RealAsDouble(value);
1111 if (re == -1.0 && PyErr_Occurred())
1112 goto done;
1113 im = PyComplex_ImagAsDouble(value);
1114 if (im == -1.0 && PyErr_Occurred())
1115 goto done;
1116
1117 if (format->alternate)
1118 flags |= Py_DTSF_ALT;
1119
1120 if (type == '\0') {
1121 /* Omitted type specifier. Should be like str(self). */
1122 type = 'r';
1123 default_precision = 0;
1124 if (re == 0.0 && copysign(1.0, re) == 1.0)
1125 skip_re = 1;
1126 else
1127 add_parens = 1;
1128 }
1129
1130 if (type == 'n')
1131 /* 'n' is the same as 'g', except for the locale used to
1132 format the result. We take care of that later. */
1133 type = 'g';
1134
1135 if (precision < 0)
1136 precision = default_precision;
1137 else if (type == 'r')
1138 type = 'g';
1139
1140 /* Cast "type", because if we're in unicode we need to pass a
1141 8-bit char. This is safe, because we've restricted what "type"
1142 can be. */
1143 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1144 &re_float_type);
1145 if (re_buf == NULL)
1146 goto done;
1147 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1148 &im_float_type);
1149 if (im_buf == NULL)
1150 goto done;
1151
1152 n_re_digits = strlen(re_buf);
1153 n_im_digits = strlen(im_buf);
1154
1155 /* Since there is no unicode version of PyOS_double_to_string,
1156 just use the 8 bit version and then convert to unicode. */
1157 re_unicode_tmp = strtounicode(re_buf, n_re_digits);
1158 if (re_unicode_tmp == NULL)
1159 goto done;
1160 i_re = 0;
1161
1162 im_unicode_tmp = strtounicode(im_buf, n_im_digits);
1163 if (im_unicode_tmp == NULL)
1164 goto done;
1165 i_im = 0;
1166
1167 /* Is a sign character present in the output? If so, remember it
1168 and skip it */
1169 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1170 re_sign_char = '-';
1171 ++i_re;
1172 --n_re_digits;
1173 }
1174 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1175 im_sign_char = '-';
1176 ++i_im;
1177 --n_im_digits;
1178 }
1179
1180 /* Determine if we have any "remainder" (after the digits, might include
1181 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001182 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001184 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001185 &n_im_remainder, &im_has_decimal);
1186
1187 /* Determine the grouping, separator, and decimal point, if any. */
1188 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1189 (format->thousands_separators ?
1190 LT_DEFAULT_LOCALE :
1191 LT_NO_LOCALE),
1192 &locale);
1193
1194 /* Turn off any padding. We'll do it later after we've composed
1195 the numbers without padding. */
1196 tmp_format.fill_char = '\0';
1197 tmp_format.align = '<';
1198 tmp_format.width = -1;
1199
1200 /* Calculate how much memory we'll need. */
1201 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1202 i_re, i_re + n_re_digits, n_re_remainder,
1203 re_has_decimal, &locale, &tmp_format);
1204
1205 /* Same formatting, but always include a sign, unless the real part is
1206 * going to be omitted, in which case we use whatever sign convention was
1207 * requested by the original format. */
1208 if (!skip_re)
1209 tmp_format.sign = '+';
1210 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1211 i_im, i_im + n_im_digits, n_im_remainder,
1212 im_has_decimal, &locale, &tmp_format);
1213
1214 if (skip_re)
1215 n_re_total = 0;
1216
1217 /* Add 1 for the 'j', and optionally 2 for parens. */
1218 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1219 format->width, format->align, &lpad, &rpad, &total);
1220
Victor Stinnera4ac6002012-01-21 15:50:49 +01001221 if (re_spec.n_lpadding || re_spec.n_spadding || re_spec.n_rpadding
1222 || im_spec.n_lpadding || im_spec.n_spadding || im_spec.n_rpadding
1223 || lpad || rpad)
1224 maxchar = Py_MAX(maxchar, format->fill_char);
1225
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001226 result = PyUnicode_New(total, maxchar);
1227 if (result == NULL)
1228 goto done;
1229 rkind = PyUnicode_KIND(result);
1230 rdata = PyUnicode_DATA(result);
1231
1232 /* Populate the memory. First, the padding. */
1233 index = fill_padding(result, 0,
1234 n_re_total + n_im_total + 1 + add_parens * 2,
1235 format->fill_char=='\0' ? ' ' : format->fill_char,
1236 lpad, rpad);
1237
1238 if (add_parens)
1239 PyUnicode_WRITE(rkind, rdata, index++, '(');
1240
1241 if (!skip_re) {
Victor Stinnerafbaa202011-09-28 21:50:16 +02001242 err = fill_number(result, index, &re_spec,
1243 re_unicode_tmp, i_re, i_re + n_re_digits,
1244 NULL, 0,
1245 0,
1246 &locale, 0);
1247 if (err) {
1248 Py_CLEAR(result);
1249 goto done;
1250 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001251 index += n_re_total;
1252 }
Victor Stinnerafbaa202011-09-28 21:50:16 +02001253 err = fill_number(result, index, &im_spec,
1254 im_unicode_tmp, i_im, i_im + n_im_digits,
1255 NULL, 0,
1256 0,
1257 &locale, 0);
1258 if (err) {
1259 Py_CLEAR(result);
1260 goto done;
1261 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 index += n_im_total;
1263 PyUnicode_WRITE(rkind, rdata, index++, 'j');
1264
1265 if (add_parens)
1266 PyUnicode_WRITE(rkind, rdata, index++, ')');
1267
1268done:
1269 PyMem_Free(re_buf);
1270 PyMem_Free(im_buf);
1271 Py_XDECREF(re_unicode_tmp);
1272 Py_XDECREF(im_unicode_tmp);
1273 return result;
1274}
1275
1276/************************************************************************/
1277/*********** built in formatters ****************************************/
1278/************************************************************************/
1279PyObject *
1280_PyUnicode_FormatAdvanced(PyObject *obj,
1281 PyObject *format_spec,
1282 Py_ssize_t start, Py_ssize_t end)
1283{
1284 InternalFormatSpec format;
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001285 PyObject *result;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001286
1287 /* check for the special case of zero length format spec, make
1288 it equivalent to str(obj) */
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001289 if (start == end)
1290 return PyObject_Str(obj);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291
1292 /* parse the format_spec */
1293 if (!parse_internal_render_format_spec(format_spec, start, end,
1294 &format, 's', '<'))
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001295 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296
1297 /* type conversion? */
1298 switch (format.type) {
1299 case 's':
1300 /* no type conversion needed, already a string. do the formatting */
1301 result = format_string_internal(obj, &format);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001302 if (result != NULL)
1303 assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 break;
1305 default:
1306 /* unknown */
1307 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001308 result = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 return result;
1311}
1312
1313static PyObject*
1314format_int_or_long(PyObject* obj, PyObject* format_spec,
1315 Py_ssize_t start, Py_ssize_t end,
1316 IntOrLongToString tostring)
1317{
1318 PyObject *result = NULL;
1319 PyObject *tmp = NULL;
1320 InternalFormatSpec format;
1321
1322 /* check for the special case of zero length format spec, make
1323 it equivalent to str(obj) */
1324 if (start == end) {
1325 result = PyObject_Str(obj);
1326 goto done;
1327 }
1328
1329 /* parse the format_spec */
1330 if (!parse_internal_render_format_spec(format_spec, start, end,
1331 &format, 'd', '>'))
1332 goto done;
1333
1334 /* type conversion? */
1335 switch (format.type) {
1336 case 'b':
1337 case 'c':
1338 case 'd':
1339 case 'o':
1340 case 'x':
1341 case 'X':
1342 case 'n':
1343 /* no type conversion needed, already an int (or long). do
1344 the formatting */
1345 result = format_int_or_long_internal(obj, &format, tostring);
1346 break;
1347
1348 case 'e':
1349 case 'E':
1350 case 'f':
1351 case 'F':
1352 case 'g':
1353 case 'G':
1354 case '%':
1355 /* convert to float */
1356 tmp = PyNumber_Float(obj);
1357 if (tmp == NULL)
1358 goto done;
1359 result = format_float_internal(tmp, &format);
1360 break;
1361
1362 default:
1363 /* unknown */
1364 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1365 goto done;
1366 }
1367
1368done:
1369 Py_XDECREF(tmp);
1370 return result;
1371}
1372
1373/* Need to define long_format as a function that will convert a long
1374 to a string. In 3.0, _PyLong_Format has the correct signature. */
1375#define long_format _PyLong_Format
1376
1377PyObject *
1378_PyLong_FormatAdvanced(PyObject *obj,
1379 PyObject *format_spec,
1380 Py_ssize_t start, Py_ssize_t end)
1381{
1382 return format_int_or_long(obj, format_spec, start, end,
1383 long_format);
1384}
1385
1386PyObject *
1387_PyFloat_FormatAdvanced(PyObject *obj,
1388 PyObject *format_spec,
1389 Py_ssize_t start, Py_ssize_t end)
1390{
1391 PyObject *result = NULL;
1392 InternalFormatSpec format;
1393
1394 /* check for the special case of zero length format spec, make
1395 it equivalent to str(obj) */
1396 if (start == end) {
1397 result = PyObject_Str(obj);
1398 goto done;
1399 }
1400
1401 /* parse the format_spec */
1402 if (!parse_internal_render_format_spec(format_spec, start, end,
1403 &format, '\0', '>'))
1404 goto done;
1405
1406 /* type conversion? */
1407 switch (format.type) {
1408 case '\0': /* No format code: like 'g', but with at least one decimal. */
1409 case 'e':
1410 case 'E':
1411 case 'f':
1412 case 'F':
1413 case 'g':
1414 case 'G':
1415 case 'n':
1416 case '%':
1417 /* no conversion, already a float. do the formatting */
1418 result = format_float_internal(obj, &format);
1419 break;
1420
1421 default:
1422 /* unknown */
1423 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1424 goto done;
1425 }
1426
1427done:
1428 return result;
1429}
1430
1431PyObject *
1432_PyComplex_FormatAdvanced(PyObject *obj,
1433 PyObject *format_spec,
1434 Py_ssize_t start, Py_ssize_t end)
1435{
1436 PyObject *result = NULL;
1437 InternalFormatSpec format;
1438
1439 /* check for the special case of zero length format spec, make
1440 it equivalent to str(obj) */
1441 if (start == end) {
1442 result = PyObject_Str(obj);
1443 goto done;
1444 }
1445
1446 /* parse the format_spec */
1447 if (!parse_internal_render_format_spec(format_spec, start, end,
1448 &format, '\0', '>'))
1449 goto done;
1450
1451 /* type conversion? */
1452 switch (format.type) {
1453 case '\0': /* No format code: like 'g', but with at least one decimal. */
1454 case 'e':
1455 case 'E':
1456 case 'f':
1457 case 'F':
1458 case 'g':
1459 case 'G':
1460 case 'n':
1461 /* no conversion, already a complex. do the formatting */
1462 result = format_complex_internal(obj, &format);
1463 break;
1464
1465 default:
1466 /* unknown */
1467 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1468 goto done;
1469 }
1470
1471done:
1472 return result;
1473}