blob: e1c00df9e6bda267a1f7f768ef14e3208dc5c847 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
159
160 format->fill_char = '\0';
161 format->align = default_align;
162 format->alternate = 0;
163 format->sign = '\0';
164 format->width = -1;
165 format->thousands_separators = 0;
166 format->precision = -1;
167 format->type = default_type;
168
169 /* If the second char is an alignment token,
170 then parse the fill char */
171 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
172 format->align = READ_spec(pos+1);
173 format->fill_char = READ_spec(pos);
174 align_specified = 1;
175 pos += 2;
176 }
177 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
178 format->align = READ_spec(pos);
179 align_specified = 1;
180 ++pos;
181 }
182
183 /* Parse the various sign options */
184 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
185 format->sign = READ_spec(pos);
186 ++pos;
187 }
188
189 /* If the next character is #, we're in alternate mode. This only
190 applies to integers. */
191 if (end-pos >= 1 && READ_spec(pos) == '#') {
192 format->alternate = 1;
193 ++pos;
194 }
195
196 /* The special case for 0-padding (backwards compat) */
197 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
198 format->fill_char = '0';
199 if (!align_specified) {
200 format->align = '=';
201 }
202 ++pos;
203 }
204
205 consumed = get_integer(format_spec, &pos, end, &format->width);
206 if (consumed == -1)
207 /* Overflow error. Exception already set. */
208 return 0;
209
210 /* If consumed is 0, we didn't consume any characters for the
211 width. In that case, reset the width to -1, because
212 get_integer() will have set it to zero. -1 is how we record
213 that the width wasn't specified. */
214 if (consumed == 0)
215 format->width = -1;
216
217 /* Comma signifies add thousands separators */
218 if (end-pos && READ_spec(pos) == ',') {
219 format->thousands_separators = 1;
220 ++pos;
221 }
222
223 /* Parse field precision */
224 if (end-pos && READ_spec(pos) == '.') {
225 ++pos;
226
227 consumed = get_integer(format_spec, &pos, end, &format->precision);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* Not having a precision after a dot is an error. */
233 if (consumed == 0) {
234 PyErr_Format(PyExc_ValueError,
235 "Format specifier missing precision");
236 return 0;
237 }
238
239 }
240
241 /* Finally, parse the type field. */
242
243 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500244 /* More than one char remain, invalid format specifier. */
245 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 return 0;
247 }
248
249 if (end-pos == 1) {
250 format->type = READ_spec(pos);
251 ++pos;
252 }
253
254 /* Do as much validating as we can, just by looking at the format
255 specifier. Do not take into account what type of formatting
256 we're doing (int, float, string). */
257
258 if (format->thousands_separators) {
259 switch (format->type) {
260 case 'd':
261 case 'e':
262 case 'f':
263 case 'g':
264 case 'E':
265 case 'G':
266 case '%':
267 case 'F':
268 case '\0':
269 /* These are allowed. See PEP 378.*/
270 break;
271 default:
272 invalid_comma_type(format->type);
273 return 0;
274 }
275 }
276
Victor Stinnera4ac6002012-01-21 15:50:49 +0100277 assert (format->align <= 127);
278 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 return 1;
280}
281
282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* Figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else if (align == '<' || align == '=')
306 *n_lpadding = 0;
307 else {
308 /* We should never have an unspecified alignment. */
309 *n_lpadding = 0;
310 assert(0);
311 }
312
313 *n_rpadding = *n_total - nchars - *n_lpadding;
314}
315
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316/* Do the padding, and return a pointer to where the caller-supplied
317 content goes. */
318static Py_ssize_t
319fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
320 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
321 Py_ssize_t n_rpadding)
322{
323 /* Pad on left. */
324 if (n_lpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100325 PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326
327 /* Pad on right. */
328 if (n_rpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100329 PyUnicode_Fill(s, start + nchars + n_lpadding,
330 start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331
332 /* Pointer to the user content. */
333 return start + n_lpadding;
334}
335
336/************************************************************************/
337/*********** common routines for numeric formatting *********************/
338/************************************************************************/
339
340/* Locale type codes. */
341#define LT_CURRENT_LOCALE 0
342#define LT_DEFAULT_LOCALE 1
343#define LT_NO_LOCALE 2
344
345/* Locale info needed for formatting integers and the part of floats
346 before and including the decimal. Note that locales only support
347 8-bit chars, not unicode. */
348typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100349 PyObject *decimal_point;
350 PyObject *thousands_sep;
351 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352} LocaleInfo;
353
Victor Stinner41a863c2012-02-24 00:37:51 +0100354#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
355
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356/* describes the layout for an integer, see the comment in
357 calc_number_widths() for details */
358typedef struct {
359 Py_ssize_t n_lpadding;
360 Py_ssize_t n_prefix;
361 Py_ssize_t n_spadding;
362 Py_ssize_t n_rpadding;
363 char sign;
364 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
365 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
366 any grouping chars. */
367 Py_ssize_t n_decimal; /* 0 if only an integer */
368 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
369 excluding the decimal itself, if
370 present. */
371
372 /* These 2 are not the widths of fields, but are needed by
373 STRINGLIB_GROUPING. */
374 Py_ssize_t n_digits; /* The number of digits before a decimal
375 or exponent. */
376 Py_ssize_t n_min_width; /* The min_width we used when we computed
377 the n_grouped_digits width. */
378} NumberFieldWidths;
379
380
381/* Given a number of the form:
382 digits[remainder]
383 where ptr points to the start and end points to the end, find where
384 the integer part ends. This could be a decimal, an exponent, both,
385 or neither.
386 If a decimal point is present, set *has_decimal and increment
387 remainder beyond it.
388 Results are undefined (but shouldn't crash) for improperly
389 formatted strings.
390*/
391static void
392parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
393 Py_ssize_t *n_remainder, int *has_decimal)
394{
395 Py_ssize_t remainder;
396
397 while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
398 ++pos;
399 remainder = pos;
400
401 /* Does remainder start with a decimal point? */
402 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
403
404 /* Skip the decimal point. */
405 if (*has_decimal)
406 remainder++;
407
408 *n_remainder = end - remainder;
409}
410
411/* not all fields of format are used. for example, precision is
412 unused. should this take discrete params in order to be more clear
413 about what it does? or is passing a single format parameter easier
414 and more efficient enough to justify a little obfuscation? */
415static Py_ssize_t
416calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
417 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
418 Py_ssize_t n_end, Py_ssize_t n_remainder,
419 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100420 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200421{
422 Py_ssize_t n_non_digit_non_padding;
423 Py_ssize_t n_padding;
424
425 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
426 spec->n_lpadding = 0;
427 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100428 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 spec->n_remainder = n_remainder;
430 spec->n_spadding = 0;
431 spec->n_rpadding = 0;
432 spec->sign = '\0';
433 spec->n_sign = 0;
434
435 /* the output will look like:
436 | |
437 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
438 | |
439
440 sign is computed from format->sign and the actual
441 sign of the number
442
443 prefix is given (it's for the '0x' prefix)
444
445 digits is already known
446
447 the total width is either given, or computed from the
448 actual digits
449
450 only one of lpadding, spadding, and rpadding can be non-zero,
451 and it's calculated from the width and other fields
452 */
453
454 /* compute the various parts we're going to write */
455 switch (format->sign) {
456 case '+':
457 /* always put a + or - */
458 spec->n_sign = 1;
459 spec->sign = (sign_char == '-' ? '-' : '+');
460 break;
461 case ' ':
462 spec->n_sign = 1;
463 spec->sign = (sign_char == '-' ? '-' : ' ');
464 break;
465 default:
466 /* Not specified, or the default (-) */
467 if (sign_char == '-') {
468 spec->n_sign = 1;
469 spec->sign = '-';
470 }
471 }
472
473 /* The number of chars used for non-digits and non-padding. */
474 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
475 spec->n_remainder;
476
477 /* min_width can go negative, that's okay. format->width == -1 means
478 we don't care. */
479 if (format->fill_char == '0' && format->align == '=')
480 spec->n_min_width = format->width - n_non_digit_non_padding;
481 else
482 spec->n_min_width = 0;
483
484 if (spec->n_digits == 0)
485 /* This case only occurs when using 'c' formatting, we need
486 to special case it because the grouping code always wants
487 to have at least one character. */
488 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100489 else {
490 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100492 NULL, 0,
493 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100495 locale->grouping, locale->thousands_sep, &grouping_maxchar);
496 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
497 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498
499 /* Given the desired width and the total of digit and non-digit
500 space we consume, see if we need any padding. format->width can
501 be negative (meaning no padding), but this code still works in
502 that case. */
503 n_padding = format->width -
504 (n_non_digit_non_padding + spec->n_grouped_digits);
505 if (n_padding > 0) {
506 /* Some padding is needed. Determine if it's left, space, or right. */
507 switch (format->align) {
508 case '<':
509 spec->n_rpadding = n_padding;
510 break;
511 case '^':
512 spec->n_lpadding = n_padding / 2;
513 spec->n_rpadding = n_padding - spec->n_lpadding;
514 break;
515 case '=':
516 spec->n_spadding = n_padding;
517 break;
518 case '>':
519 spec->n_lpadding = n_padding;
520 break;
521 default:
522 /* Shouldn't get here, but treat it as '>' */
523 spec->n_lpadding = n_padding;
524 assert(0);
525 break;
526 }
527 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100528
529 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
530 *maxchar = Py_MAX(*maxchar, format->fill_char);
531
Victor Stinner90f50d42012-02-24 01:44:47 +0100532 if (spec->n_decimal)
533 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
534
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200535 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
536 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
537 spec->n_remainder + spec->n_rpadding;
538}
539
540/* Fill in the digit parts of a numbers's string representation,
541 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200542 Return -1 on error, or 0 on success. */
543static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
545 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200546 PyObject *prefix, Py_ssize_t p_start,
547 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200548 LocaleInfo *locale, int toupper)
549{
550 /* Used to keep track of digits, decimal, and remainder. */
551 Py_ssize_t d_pos = d_start;
552 unsigned int kind = PyUnicode_KIND(out);
553 void *data = PyUnicode_DATA(out);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200554 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555
556 if (spec->n_lpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100557 PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200558 pos += spec->n_lpadding;
559 }
560 if (spec->n_sign == 1) {
561 PyUnicode_WRITE(kind, data, pos++, spec->sign);
562 }
563 if (spec->n_prefix) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200564 if (PyUnicode_CopyCharacters(out, pos,
565 prefix, p_start,
566 spec->n_prefix) < 0)
567 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 if (toupper) {
569 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500570 for (t = 0; t < spec->n_prefix; t++) {
571 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100572 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100573 assert (c <= 127);
Victor Stinnered277852012-02-01 00:22:23 +0100574 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500575 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 }
577 pos += spec->n_prefix;
578 }
579 if (spec->n_spadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100580 PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 pos += spec->n_spadding;
582 }
583
584 /* Only for type 'c' special case, it has no digits. */
585 if (spec->n_digits != 0) {
586 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200587 char *pdigits;
588 if (PyUnicode_READY(digits))
589 return -1;
590 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200591 if (PyUnicode_KIND(digits) < kind) {
592 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200593 if (pdigits == NULL)
594 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200595 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100596 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100597 out, pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200598 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200599 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100601 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100602 if (r == -1)
603 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200604 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200605 if (PyUnicode_KIND(digits) < kind)
606 PyMem_Free(pdigits);
607 d_pos += spec->n_digits;
608 }
609 if (toupper) {
610 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500611 for (t = 0; t < spec->n_grouped_digits; t++) {
612 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100613 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500614 if (c > 127) {
615 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
616 return -1;
617 }
Victor Stinnered277852012-02-01 00:22:23 +0100618 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500619 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200620 }
621 pos += spec->n_grouped_digits;
622
623 if (spec->n_decimal) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100624 if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0)
625 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 pos += spec->n_decimal;
627 d_pos += 1;
628 }
629
630 if (spec->n_remainder) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200631 if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
632 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 pos += spec->n_remainder;
634 d_pos += spec->n_remainder;
635 }
636
637 if (spec->n_rpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100638 PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 pos += spec->n_rpadding;
640 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200641 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642}
643
644static char no_grouping[1] = {CHAR_MAX};
645
646/* Find the decimal point character(s?), thousands_separator(s?), and
647 grouping description, either for the current locale if type is
648 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
649 none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100650static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200651get_locale_info(int type, LocaleInfo *locale_info)
652{
653 switch (type) {
654 case LT_CURRENT_LOCALE: {
655 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100656 locale_info->decimal_point = PyUnicode_DecodeLocale(
657 locale_data->decimal_point,
658 NULL);
659 if (locale_info->decimal_point == NULL)
660 return -1;
661 locale_info->thousands_sep = PyUnicode_DecodeLocale(
662 locale_data->thousands_sep,
663 NULL);
664 if (locale_info->thousands_sep == NULL) {
665 Py_DECREF(locale_info->decimal_point);
666 return -1;
667 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200668 locale_info->grouping = locale_data->grouping;
669 break;
670 }
671 case LT_DEFAULT_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100672 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
673 locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
674 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
675 Py_XDECREF(locale_info->decimal_point);
676 Py_XDECREF(locale_info->thousands_sep);
677 return -1;
678 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200679 locale_info->grouping = "\3"; /* Group every 3 characters. The
680 (implicit) trailing 0 means repeat
681 infinitely. */
682 break;
683 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100684 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
685 locale_info->thousands_sep = PyUnicode_New(0, 0);
686 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
687 Py_XDECREF(locale_info->decimal_point);
688 Py_XDECREF(locale_info->thousands_sep);
689 return -1;
690 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691 locale_info->grouping = no_grouping;
692 break;
693 default:
694 assert(0);
695 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100696 return 0;
697}
698
699static void
700free_locale_info(LocaleInfo *locale_info)
701{
702 Py_XDECREF(locale_info->decimal_point);
703 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200704}
705
706/************************************************************************/
707/*********** string formatting ******************************************/
708/************************************************************************/
709
710static PyObject *
711format_string_internal(PyObject *value, const InternalFormatSpec *format)
712{
713 Py_ssize_t lpad;
714 Py_ssize_t rpad;
715 Py_ssize_t total;
716 Py_ssize_t pos;
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200717 Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200718 PyObject *result = NULL;
Victor Stinnerece58de2012-04-23 23:36:38 +0200719 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720
721 /* sign is not allowed on strings */
722 if (format->sign != '\0') {
723 PyErr_SetString(PyExc_ValueError,
724 "Sign not allowed in string format specifier");
725 goto done;
726 }
727
728 /* alternate is not allowed on strings */
729 if (format->alternate) {
730 PyErr_SetString(PyExc_ValueError,
731 "Alternate form (#) not allowed in string format "
732 "specifier");
733 goto done;
734 }
735
736 /* '=' alignment not allowed on strings */
737 if (format->align == '=') {
738 PyErr_SetString(PyExc_ValueError,
739 "'=' alignment not allowed "
740 "in string format specifier");
741 goto done;
742 }
743
744 /* if precision is specified, output no more that format.precision
745 characters */
746 if (format->precision >= 0 && len >= format->precision) {
747 len = format->precision;
748 }
749
750 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
751
Victor Stinnerece58de2012-04-23 23:36:38 +0200752 maxchar = _PyUnicode_FindMaxChar(value, 0, len);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100753 if (lpad != 0 || rpad != 0)
754 maxchar = Py_MAX(maxchar, format->fill_char);
755
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200756 /* allocate the resulting string */
757 result = PyUnicode_New(total, maxchar);
758 if (result == NULL)
759 goto done;
760
761 /* Write into that space. First the padding. */
762 pos = fill_padding(result, 0, len,
763 format->fill_char=='\0'?' ':format->fill_char,
764 lpad, rpad);
765
766 /* Then the source string. */
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200767 if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
768 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769
770done:
Victor Stinnered277852012-02-01 00:22:23 +0100771 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200772 return result;
773}
774
775
776/************************************************************************/
777/*********** long formatting ********************************************/
778/************************************************************************/
779
780typedef PyObject*
781(*IntOrLongToString)(PyObject *value, int base);
782
783static PyObject *
784format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
785 IntOrLongToString tostring)
786{
787 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100788 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200789 PyObject *tmp = NULL;
790 Py_ssize_t inumeric_chars;
791 Py_UCS4 sign_char = '\0';
792 Py_ssize_t n_digits; /* count of digits need from the computed
793 string */
794 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
795 produces non-digits */
796 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
797 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100798 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200799 NumberFieldWidths spec;
800 long x;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200801 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802
803 /* Locale settings, either from the actual locale or
804 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100805 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200806
807 /* no precision allowed on integers */
808 if (format->precision != -1) {
809 PyErr_SetString(PyExc_ValueError,
810 "Precision not allowed in integer format specifier");
811 goto done;
812 }
813
814 /* special case for character formatting */
815 if (format->type == 'c') {
816 /* error to specify a sign */
817 if (format->sign != '\0') {
818 PyErr_SetString(PyExc_ValueError,
819 "Sign not allowed with integer"
820 " format specifier 'c'");
821 goto done;
822 }
823
824 /* taken from unicodeobject.c formatchar() */
825 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200826 x = PyLong_AsLong(value);
827 if (x == -1 && PyErr_Occurred())
828 goto done;
829 if (x < 0 || x > 0x10ffff) {
830 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100831 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200832 goto done;
833 }
834 tmp = PyUnicode_FromOrdinal(x);
835 inumeric_chars = 0;
836 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100837 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200838
839 /* As a sort-of hack, we tell calc_number_widths that we only
840 have "remainder" characters. calc_number_widths thinks
841 these are characters that don't get formatted, only copied
842 into the output string. We do this for 'c' formatting,
843 because the characters are likely to be non-digits. */
844 n_remainder = 1;
845 }
846 else {
847 int base;
848 int leading_chars_to_skip = 0; /* Number of characters added by
849 PyNumber_ToBase that we want to
850 skip over. */
851
852 /* Compute the base and how many characters will be added by
853 PyNumber_ToBase */
854 switch (format->type) {
855 case 'b':
856 base = 2;
857 leading_chars_to_skip = 2; /* 0b */
858 break;
859 case 'o':
860 base = 8;
861 leading_chars_to_skip = 2; /* 0o */
862 break;
863 case 'x':
864 case 'X':
865 base = 16;
866 leading_chars_to_skip = 2; /* 0x */
867 break;
868 default: /* shouldn't be needed, but stops a compiler warning */
869 case 'd':
870 case 'n':
871 base = 10;
872 break;
873 }
874
875 /* The number of prefix chars is the same as the leading
876 chars to skip */
877 if (format->alternate)
878 n_prefix = leading_chars_to_skip;
879
880 /* Do the hard part, converting to a string in a given base */
881 tmp = tostring(value, base);
882 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
883 goto done;
884
885 inumeric_chars = 0;
886 n_digits = PyUnicode_GET_LENGTH(tmp);
887
888 prefix = inumeric_chars;
889
890 /* Is a sign character present in the output? If so, remember it
891 and skip it */
892 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
893 sign_char = '-';
894 ++prefix;
895 ++leading_chars_to_skip;
896 }
897
898 /* Skip over the leading chars (0x, 0b, etc.) */
899 n_digits -= leading_chars_to_skip;
900 inumeric_chars += leading_chars_to_skip;
901 }
902
903 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100904 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
905 (format->thousands_separators ?
906 LT_DEFAULT_LOCALE :
907 LT_NO_LOCALE),
908 &locale) == -1)
909 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200910
911 /* Calculate how much memory we'll need. */
912 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100913 inumeric_chars + n_digits, n_remainder, 0,
914 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100915
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200916 /* Allocate the memory. */
917 result = PyUnicode_New(n_total, maxchar);
918 if (!result)
919 goto done;
920
921 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +0200922 err = fill_number(result, 0, &spec,
923 tmp, inumeric_chars, inumeric_chars + n_digits,
924 tmp, prefix,
925 format->fill_char == '\0' ? ' ' : format->fill_char,
926 &locale, format->type == 'X');
927 if (err)
928 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200929
930done:
931 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +0100932 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +0100933 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200934 return result;
935}
936
937/************************************************************************/
938/*********** float formatting *******************************************/
939/************************************************************************/
940
941static PyObject*
942strtounicode(char *charbuffer, Py_ssize_t len)
943{
944 return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
945}
946
947/* much of this is taken from unicodeobject.c */
948static PyObject *
949format_float_internal(PyObject *value,
950 const InternalFormatSpec *format)
951{
952 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
953 Py_ssize_t n_digits;
954 Py_ssize_t n_remainder;
955 Py_ssize_t n_total;
956 int has_decimal;
957 double val;
958 Py_ssize_t precision = format->precision;
959 Py_ssize_t default_precision = 6;
960 Py_UCS4 type = format->type;
961 int add_pct = 0;
962 Py_ssize_t index;
963 NumberFieldWidths spec;
964 int flags = 0;
965 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100966 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 Py_UCS4 sign_char = '\0';
968 int float_type; /* Used to see if we have a nan, inf, or regular float. */
969 PyObject *unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200970 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971
972 /* Locale settings, either from the actual locale or
973 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100974 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975
976 if (format->alternate)
977 flags |= Py_DTSF_ALT;
978
979 if (type == '\0') {
980 /* Omitted type specifier. Behaves in the same way as repr(x)
981 and str(x) if no precision is given, else like 'g', but with
982 at least one digit after the decimal point. */
983 flags |= Py_DTSF_ADD_DOT_0;
984 type = 'r';
985 default_precision = 0;
986 }
987
988 if (type == 'n')
989 /* 'n' is the same as 'g', except for the locale used to
990 format the result. We take care of that later. */
991 type = 'g';
992
993 val = PyFloat_AsDouble(value);
994 if (val == -1.0 && PyErr_Occurred())
995 goto done;
996
997 if (type == '%') {
998 type = 'f';
999 val *= 100;
1000 add_pct = 1;
1001 }
1002
1003 if (precision < 0)
1004 precision = default_precision;
1005 else if (type == 'r')
1006 type = 'g';
1007
1008 /* Cast "type", because if we're in unicode we need to pass a
1009 8-bit char. This is safe, because we've restricted what "type"
1010 can be. */
1011 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1012 &float_type);
1013 if (buf == NULL)
1014 goto done;
1015 n_digits = strlen(buf);
1016
1017 if (add_pct) {
1018 /* We know that buf has a trailing zero (since we just called
1019 strlen() on it), and we don't use that fact any more. So we
1020 can just write over the trailing zero. */
1021 buf[n_digits] = '%';
1022 n_digits += 1;
1023 }
1024
1025 /* Since there is no unicode version of PyOS_double_to_string,
1026 just use the 8 bit version and then convert to unicode. */
1027 unicode_tmp = strtounicode(buf, n_digits);
1028 if (unicode_tmp == NULL)
1029 goto done;
1030 index = 0;
1031
1032 /* Is a sign character present in the output? If so, remember it
1033 and skip it */
1034 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1035 sign_char = '-';
1036 ++index;
1037 --n_digits;
1038 }
1039
1040 /* Determine if we have any "remainder" (after the digits, might include
1041 decimal or exponent or both (or neither)) */
1042 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1043
1044 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001045 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1046 (format->thousands_separators ?
1047 LT_DEFAULT_LOCALE :
1048 LT_NO_LOCALE),
1049 &locale) == -1)
1050 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001051
1052 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001053 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001055 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001056
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001057 /* Allocate the memory. */
1058 result = PyUnicode_New(n_total, maxchar);
1059 if (result == NULL)
1060 goto done;
1061
1062 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001063 err = fill_number(result, 0, &spec,
1064 unicode_tmp, index, index + n_digits,
1065 NULL, 0,
1066 format->fill_char == '\0' ? ' ' : format->fill_char,
1067 &locale, 0);
1068 if (err)
1069 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001070
1071done:
1072 PyMem_Free(buf);
1073 Py_DECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001074 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +01001075 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 return result;
1077}
1078
1079/************************************************************************/
1080/*********** complex formatting *****************************************/
1081/************************************************************************/
1082
1083static PyObject *
1084format_complex_internal(PyObject *value,
1085 const InternalFormatSpec *format)
1086{
1087 double re;
1088 double im;
1089 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1090 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1091
1092 InternalFormatSpec tmp_format = *format;
1093 Py_ssize_t n_re_digits;
1094 Py_ssize_t n_im_digits;
1095 Py_ssize_t n_re_remainder;
1096 Py_ssize_t n_im_remainder;
1097 Py_ssize_t n_re_total;
1098 Py_ssize_t n_im_total;
1099 int re_has_decimal;
1100 int im_has_decimal;
1101 Py_ssize_t precision = format->precision;
1102 Py_ssize_t default_precision = 6;
1103 Py_UCS4 type = format->type;
1104 Py_ssize_t i_re;
1105 Py_ssize_t i_im;
1106 NumberFieldWidths re_spec;
1107 NumberFieldWidths im_spec;
1108 int flags = 0;
1109 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001110 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111 int rkind;
1112 void *rdata;
1113 Py_ssize_t index;
1114 Py_UCS4 re_sign_char = '\0';
1115 Py_UCS4 im_sign_char = '\0';
1116 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1117 int im_float_type;
1118 int add_parens = 0;
1119 int skip_re = 0;
1120 Py_ssize_t lpad;
1121 Py_ssize_t rpad;
1122 Py_ssize_t total;
1123 PyObject *re_unicode_tmp = NULL;
1124 PyObject *im_unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +02001125 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001126
1127 /* Locale settings, either from the actual locale or
1128 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001129 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001130
1131 /* Zero padding is not allowed. */
1132 if (format->fill_char == '0') {
1133 PyErr_SetString(PyExc_ValueError,
1134 "Zero padding is not allowed in complex format "
1135 "specifier");
1136 goto done;
1137 }
1138
1139 /* Neither is '=' alignment . */
1140 if (format->align == '=') {
1141 PyErr_SetString(PyExc_ValueError,
1142 "'=' alignment flag is not allowed in complex format "
1143 "specifier");
1144 goto done;
1145 }
1146
1147 re = PyComplex_RealAsDouble(value);
1148 if (re == -1.0 && PyErr_Occurred())
1149 goto done;
1150 im = PyComplex_ImagAsDouble(value);
1151 if (im == -1.0 && PyErr_Occurred())
1152 goto done;
1153
1154 if (format->alternate)
1155 flags |= Py_DTSF_ALT;
1156
1157 if (type == '\0') {
1158 /* Omitted type specifier. Should be like str(self). */
1159 type = 'r';
1160 default_precision = 0;
1161 if (re == 0.0 && copysign(1.0, re) == 1.0)
1162 skip_re = 1;
1163 else
1164 add_parens = 1;
1165 }
1166
1167 if (type == 'n')
1168 /* 'n' is the same as 'g', except for the locale used to
1169 format the result. We take care of that later. */
1170 type = 'g';
1171
1172 if (precision < 0)
1173 precision = default_precision;
1174 else if (type == 'r')
1175 type = 'g';
1176
1177 /* Cast "type", because if we're in unicode we need to pass a
1178 8-bit char. This is safe, because we've restricted what "type"
1179 can be. */
1180 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1181 &re_float_type);
1182 if (re_buf == NULL)
1183 goto done;
1184 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1185 &im_float_type);
1186 if (im_buf == NULL)
1187 goto done;
1188
1189 n_re_digits = strlen(re_buf);
1190 n_im_digits = strlen(im_buf);
1191
1192 /* Since there is no unicode version of PyOS_double_to_string,
1193 just use the 8 bit version and then convert to unicode. */
1194 re_unicode_tmp = strtounicode(re_buf, n_re_digits);
1195 if (re_unicode_tmp == NULL)
1196 goto done;
1197 i_re = 0;
1198
1199 im_unicode_tmp = strtounicode(im_buf, n_im_digits);
1200 if (im_unicode_tmp == NULL)
1201 goto done;
1202 i_im = 0;
1203
1204 /* Is a sign character present in the output? If so, remember it
1205 and skip it */
1206 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1207 re_sign_char = '-';
1208 ++i_re;
1209 --n_re_digits;
1210 }
1211 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1212 im_sign_char = '-';
1213 ++i_im;
1214 --n_im_digits;
1215 }
1216
1217 /* Determine if we have any "remainder" (after the digits, might include
1218 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001219 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001220 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001221 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001222 &n_im_remainder, &im_has_decimal);
1223
1224 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001225 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1226 (format->thousands_separators ?
1227 LT_DEFAULT_LOCALE :
1228 LT_NO_LOCALE),
1229 &locale) == -1)
1230 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001231
1232 /* Turn off any padding. We'll do it later after we've composed
1233 the numbers without padding. */
1234 tmp_format.fill_char = '\0';
1235 tmp_format.align = '<';
1236 tmp_format.width = -1;
1237
1238 /* Calculate how much memory we'll need. */
1239 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1240 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001241 re_has_decimal, &locale, &tmp_format,
1242 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001243
1244 /* Same formatting, but always include a sign, unless the real part is
1245 * going to be omitted, in which case we use whatever sign convention was
1246 * requested by the original format. */
1247 if (!skip_re)
1248 tmp_format.sign = '+';
1249 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1250 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001251 im_has_decimal, &locale, &tmp_format,
1252 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001253
1254 if (skip_re)
1255 n_re_total = 0;
1256
1257 /* Add 1 for the 'j', and optionally 2 for parens. */
1258 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1259 format->width, format->align, &lpad, &rpad, &total);
1260
Victor Stinner41a863c2012-02-24 00:37:51 +01001261 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001262 maxchar = Py_MAX(maxchar, format->fill_char);
1263
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 result = PyUnicode_New(total, maxchar);
1265 if (result == NULL)
1266 goto done;
1267 rkind = PyUnicode_KIND(result);
1268 rdata = PyUnicode_DATA(result);
1269
1270 /* Populate the memory. First, the padding. */
1271 index = fill_padding(result, 0,
1272 n_re_total + n_im_total + 1 + add_parens * 2,
1273 format->fill_char=='\0' ? ' ' : format->fill_char,
1274 lpad, rpad);
1275
1276 if (add_parens)
1277 PyUnicode_WRITE(rkind, rdata, index++, '(');
1278
1279 if (!skip_re) {
Victor Stinnerafbaa202011-09-28 21:50:16 +02001280 err = fill_number(result, index, &re_spec,
1281 re_unicode_tmp, i_re, i_re + n_re_digits,
1282 NULL, 0,
1283 0,
1284 &locale, 0);
1285 if (err) {
1286 Py_CLEAR(result);
1287 goto done;
1288 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 index += n_re_total;
1290 }
Victor Stinnerafbaa202011-09-28 21:50:16 +02001291 err = fill_number(result, index, &im_spec,
1292 im_unicode_tmp, i_im, i_im + n_im_digits,
1293 NULL, 0,
1294 0,
1295 &locale, 0);
1296 if (err) {
1297 Py_CLEAR(result);
1298 goto done;
1299 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300 index += n_im_total;
1301 PyUnicode_WRITE(rkind, rdata, index++, 'j');
1302
1303 if (add_parens)
1304 PyUnicode_WRITE(rkind, rdata, index++, ')');
1305
1306done:
1307 PyMem_Free(re_buf);
1308 PyMem_Free(im_buf);
1309 Py_XDECREF(re_unicode_tmp);
1310 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001311 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +01001312 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 return result;
1314}
1315
1316/************************************************************************/
1317/*********** built in formatters ****************************************/
1318/************************************************************************/
1319PyObject *
1320_PyUnicode_FormatAdvanced(PyObject *obj,
1321 PyObject *format_spec,
1322 Py_ssize_t start, Py_ssize_t end)
1323{
1324 InternalFormatSpec format;
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001325 PyObject *result;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 /* check for the special case of zero length format spec, make
1328 it equivalent to str(obj) */
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001329 if (start == end)
1330 return PyObject_Str(obj);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331
1332 /* parse the format_spec */
1333 if (!parse_internal_render_format_spec(format_spec, start, end,
1334 &format, 's', '<'))
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001335 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336
1337 /* type conversion? */
1338 switch (format.type) {
1339 case 's':
1340 /* no type conversion needed, already a string. do the formatting */
1341 result = format_string_internal(obj, &format);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001342 if (result != NULL)
1343 assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001344 break;
1345 default:
1346 /* unknown */
1347 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001348 result = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 return result;
1351}
1352
1353static PyObject*
1354format_int_or_long(PyObject* obj, PyObject* format_spec,
1355 Py_ssize_t start, Py_ssize_t end,
1356 IntOrLongToString tostring)
1357{
1358 PyObject *result = NULL;
1359 PyObject *tmp = NULL;
1360 InternalFormatSpec format;
1361
1362 /* check for the special case of zero length format spec, make
1363 it equivalent to str(obj) */
1364 if (start == end) {
1365 result = PyObject_Str(obj);
1366 goto done;
1367 }
1368
1369 /* parse the format_spec */
1370 if (!parse_internal_render_format_spec(format_spec, start, end,
1371 &format, 'd', '>'))
1372 goto done;
1373
1374 /* type conversion? */
1375 switch (format.type) {
1376 case 'b':
1377 case 'c':
1378 case 'd':
1379 case 'o':
1380 case 'x':
1381 case 'X':
1382 case 'n':
1383 /* no type conversion needed, already an int (or long). do
1384 the formatting */
1385 result = format_int_or_long_internal(obj, &format, tostring);
1386 break;
1387
1388 case 'e':
1389 case 'E':
1390 case 'f':
1391 case 'F':
1392 case 'g':
1393 case 'G':
1394 case '%':
1395 /* convert to float */
1396 tmp = PyNumber_Float(obj);
1397 if (tmp == NULL)
1398 goto done;
1399 result = format_float_internal(tmp, &format);
1400 break;
1401
1402 default:
1403 /* unknown */
1404 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1405 goto done;
1406 }
1407
1408done:
1409 Py_XDECREF(tmp);
1410 return result;
1411}
1412
1413/* Need to define long_format as a function that will convert a long
1414 to a string. In 3.0, _PyLong_Format has the correct signature. */
1415#define long_format _PyLong_Format
1416
1417PyObject *
1418_PyLong_FormatAdvanced(PyObject *obj,
1419 PyObject *format_spec,
1420 Py_ssize_t start, Py_ssize_t end)
1421{
1422 return format_int_or_long(obj, format_spec, start, end,
1423 long_format);
1424}
1425
1426PyObject *
1427_PyFloat_FormatAdvanced(PyObject *obj,
1428 PyObject *format_spec,
1429 Py_ssize_t start, Py_ssize_t end)
1430{
1431 PyObject *result = NULL;
1432 InternalFormatSpec format;
1433
1434 /* check for the special case of zero length format spec, make
1435 it equivalent to str(obj) */
1436 if (start == end) {
1437 result = PyObject_Str(obj);
1438 goto done;
1439 }
1440
1441 /* parse the format_spec */
1442 if (!parse_internal_render_format_spec(format_spec, start, end,
1443 &format, '\0', '>'))
1444 goto done;
1445
1446 /* type conversion? */
1447 switch (format.type) {
1448 case '\0': /* No format code: like 'g', but with at least one decimal. */
1449 case 'e':
1450 case 'E':
1451 case 'f':
1452 case 'F':
1453 case 'g':
1454 case 'G':
1455 case 'n':
1456 case '%':
1457 /* no conversion, already a float. do the formatting */
1458 result = format_float_internal(obj, &format);
1459 break;
1460
1461 default:
1462 /* unknown */
1463 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1464 goto done;
1465 }
1466
1467done:
1468 return result;
1469}
1470
1471PyObject *
1472_PyComplex_FormatAdvanced(PyObject *obj,
1473 PyObject *format_spec,
1474 Py_ssize_t start, Py_ssize_t end)
1475{
1476 PyObject *result = NULL;
1477 InternalFormatSpec format;
1478
1479 /* check for the special case of zero length format spec, make
1480 it equivalent to str(obj) */
1481 if (start == end) {
1482 result = PyObject_Str(obj);
1483 goto done;
1484 }
1485
1486 /* parse the format_spec */
1487 if (!parse_internal_render_format_spec(format_spec, start, end,
1488 &format, '\0', '>'))
1489 goto done;
1490
1491 /* type conversion? */
1492 switch (format.type) {
1493 case '\0': /* No format code: like 'g', but with at least one decimal. */
1494 case 'e':
1495 case 'E':
1496 case 'f':
1497 case 'F':
1498 case 'g':
1499 case 'G':
1500 case 'n':
1501 /* no conversion, already a complex. do the formatting */
1502 result = format_complex_internal(obj, &format);
1503 break;
1504
1505 default:
1506 /* unknown */
1507 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1508 goto done;
1509 }
1510
1511done:
1512 return result;
1513}