blob: 58e66e0d90c5b42c8995f6b5b7e30dcbadbd12c4 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
159
160 format->fill_char = '\0';
161 format->align = default_align;
162 format->alternate = 0;
163 format->sign = '\0';
164 format->width = -1;
165 format->thousands_separators = 0;
166 format->precision = -1;
167 format->type = default_type;
168
169 /* If the second char is an alignment token,
170 then parse the fill char */
171 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
172 format->align = READ_spec(pos+1);
173 format->fill_char = READ_spec(pos);
174 align_specified = 1;
175 pos += 2;
176 }
177 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
178 format->align = READ_spec(pos);
179 align_specified = 1;
180 ++pos;
181 }
182
183 /* Parse the various sign options */
184 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
185 format->sign = READ_spec(pos);
186 ++pos;
187 }
188
189 /* If the next character is #, we're in alternate mode. This only
190 applies to integers. */
191 if (end-pos >= 1 && READ_spec(pos) == '#') {
192 format->alternate = 1;
193 ++pos;
194 }
195
196 /* The special case for 0-padding (backwards compat) */
197 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
198 format->fill_char = '0';
199 if (!align_specified) {
200 format->align = '=';
201 }
202 ++pos;
203 }
204
205 consumed = get_integer(format_spec, &pos, end, &format->width);
206 if (consumed == -1)
207 /* Overflow error. Exception already set. */
208 return 0;
209
210 /* If consumed is 0, we didn't consume any characters for the
211 width. In that case, reset the width to -1, because
212 get_integer() will have set it to zero. -1 is how we record
213 that the width wasn't specified. */
214 if (consumed == 0)
215 format->width = -1;
216
217 /* Comma signifies add thousands separators */
218 if (end-pos && READ_spec(pos) == ',') {
219 format->thousands_separators = 1;
220 ++pos;
221 }
222
223 /* Parse field precision */
224 if (end-pos && READ_spec(pos) == '.') {
225 ++pos;
226
227 consumed = get_integer(format_spec, &pos, end, &format->precision);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* Not having a precision after a dot is an error. */
233 if (consumed == 0) {
234 PyErr_Format(PyExc_ValueError,
235 "Format specifier missing precision");
236 return 0;
237 }
238
239 }
240
241 /* Finally, parse the type field. */
242
243 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500244 /* More than one char remain, invalid format specifier. */
245 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 return 0;
247 }
248
249 if (end-pos == 1) {
250 format->type = READ_spec(pos);
251 ++pos;
252 }
253
254 /* Do as much validating as we can, just by looking at the format
255 specifier. Do not take into account what type of formatting
256 we're doing (int, float, string). */
257
258 if (format->thousands_separators) {
259 switch (format->type) {
260 case 'd':
261 case 'e':
262 case 'f':
263 case 'g':
264 case 'E':
265 case 'G':
266 case '%':
267 case 'F':
268 case '\0':
269 /* These are allowed. See PEP 378.*/
270 break;
271 default:
272 invalid_comma_type(format->type);
273 return 0;
274 }
275 }
276
Victor Stinnera4ac6002012-01-21 15:50:49 +0100277 assert (format->align <= 127);
278 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 return 1;
280}
281
282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* Figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else if (align == '<' || align == '=')
306 *n_lpadding = 0;
307 else {
308 /* We should never have an unspecified alignment. */
309 *n_lpadding = 0;
310 assert(0);
311 }
312
313 *n_rpadding = *n_total - nchars - *n_lpadding;
314}
315
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316/* Do the padding, and return a pointer to where the caller-supplied
317 content goes. */
318static Py_ssize_t
319fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
320 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
321 Py_ssize_t n_rpadding)
322{
323 /* Pad on left. */
324 if (n_lpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100325 PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326
327 /* Pad on right. */
328 if (n_rpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100329 PyUnicode_Fill(s, start + nchars + n_lpadding,
330 start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331
332 /* Pointer to the user content. */
333 return start + n_lpadding;
334}
335
336/************************************************************************/
337/*********** common routines for numeric formatting *********************/
338/************************************************************************/
339
340/* Locale type codes. */
341#define LT_CURRENT_LOCALE 0
342#define LT_DEFAULT_LOCALE 1
343#define LT_NO_LOCALE 2
344
345/* Locale info needed for formatting integers and the part of floats
346 before and including the decimal. Note that locales only support
347 8-bit chars, not unicode. */
348typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100349 PyObject *decimal_point;
350 PyObject *thousands_sep;
351 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352} LocaleInfo;
353
Victor Stinner41a863c2012-02-24 00:37:51 +0100354#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
355
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356/* describes the layout for an integer, see the comment in
357 calc_number_widths() for details */
358typedef struct {
359 Py_ssize_t n_lpadding;
360 Py_ssize_t n_prefix;
361 Py_ssize_t n_spadding;
362 Py_ssize_t n_rpadding;
363 char sign;
364 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
365 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
366 any grouping chars. */
367 Py_ssize_t n_decimal; /* 0 if only an integer */
368 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
369 excluding the decimal itself, if
370 present. */
371
372 /* These 2 are not the widths of fields, but are needed by
373 STRINGLIB_GROUPING. */
374 Py_ssize_t n_digits; /* The number of digits before a decimal
375 or exponent. */
376 Py_ssize_t n_min_width; /* The min_width we used when we computed
377 the n_grouped_digits width. */
378} NumberFieldWidths;
379
380
381/* Given a number of the form:
382 digits[remainder]
383 where ptr points to the start and end points to the end, find where
384 the integer part ends. This could be a decimal, an exponent, both,
385 or neither.
386 If a decimal point is present, set *has_decimal and increment
387 remainder beyond it.
388 Results are undefined (but shouldn't crash) for improperly
389 formatted strings.
390*/
391static void
392parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
393 Py_ssize_t *n_remainder, int *has_decimal)
394{
395 Py_ssize_t remainder;
396
397 while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
398 ++pos;
399 remainder = pos;
400
401 /* Does remainder start with a decimal point? */
402 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
403
404 /* Skip the decimal point. */
405 if (*has_decimal)
406 remainder++;
407
408 *n_remainder = end - remainder;
409}
410
411/* not all fields of format are used. for example, precision is
412 unused. should this take discrete params in order to be more clear
413 about what it does? or is passing a single format parameter easier
414 and more efficient enough to justify a little obfuscation? */
415static Py_ssize_t
416calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
417 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
418 Py_ssize_t n_end, Py_ssize_t n_remainder,
419 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100420 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200421{
422 Py_ssize_t n_non_digit_non_padding;
423 Py_ssize_t n_padding;
424
425 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
426 spec->n_lpadding = 0;
427 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100428 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 spec->n_remainder = n_remainder;
430 spec->n_spadding = 0;
431 spec->n_rpadding = 0;
432 spec->sign = '\0';
433 spec->n_sign = 0;
434
435 /* the output will look like:
436 | |
437 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
438 | |
439
440 sign is computed from format->sign and the actual
441 sign of the number
442
443 prefix is given (it's for the '0x' prefix)
444
445 digits is already known
446
447 the total width is either given, or computed from the
448 actual digits
449
450 only one of lpadding, spadding, and rpadding can be non-zero,
451 and it's calculated from the width and other fields
452 */
453
454 /* compute the various parts we're going to write */
455 switch (format->sign) {
456 case '+':
457 /* always put a + or - */
458 spec->n_sign = 1;
459 spec->sign = (sign_char == '-' ? '-' : '+');
460 break;
461 case ' ':
462 spec->n_sign = 1;
463 spec->sign = (sign_char == '-' ? '-' : ' ');
464 break;
465 default:
466 /* Not specified, or the default (-) */
467 if (sign_char == '-') {
468 spec->n_sign = 1;
469 spec->sign = '-';
470 }
471 }
472
473 /* The number of chars used for non-digits and non-padding. */
474 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
475 spec->n_remainder;
476
477 /* min_width can go negative, that's okay. format->width == -1 means
478 we don't care. */
479 if (format->fill_char == '0' && format->align == '=')
480 spec->n_min_width = format->width - n_non_digit_non_padding;
481 else
482 spec->n_min_width = 0;
483
484 if (spec->n_digits == 0)
485 /* This case only occurs when using 'c' formatting, we need
486 to special case it because the grouping code always wants
487 to have at least one character. */
488 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100489 else {
490 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100492 NULL, 0,
493 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100495 locale->grouping, locale->thousands_sep, &grouping_maxchar);
496 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
497 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498
499 /* Given the desired width and the total of digit and non-digit
500 space we consume, see if we need any padding. format->width can
501 be negative (meaning no padding), but this code still works in
502 that case. */
503 n_padding = format->width -
504 (n_non_digit_non_padding + spec->n_grouped_digits);
505 if (n_padding > 0) {
506 /* Some padding is needed. Determine if it's left, space, or right. */
507 switch (format->align) {
508 case '<':
509 spec->n_rpadding = n_padding;
510 break;
511 case '^':
512 spec->n_lpadding = n_padding / 2;
513 spec->n_rpadding = n_padding - spec->n_lpadding;
514 break;
515 case '=':
516 spec->n_spadding = n_padding;
517 break;
518 case '>':
519 spec->n_lpadding = n_padding;
520 break;
521 default:
522 /* Shouldn't get here, but treat it as '>' */
523 spec->n_lpadding = n_padding;
524 assert(0);
525 break;
526 }
527 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100528
529 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
530 *maxchar = Py_MAX(*maxchar, format->fill_char);
531
Victor Stinner90f50d42012-02-24 01:44:47 +0100532 if (spec->n_decimal)
533 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
534
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200535 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
536 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
537 spec->n_remainder + spec->n_rpadding;
538}
539
540/* Fill in the digit parts of a numbers's string representation,
541 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200542 Return -1 on error, or 0 on success. */
543static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
545 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200546 PyObject *prefix, Py_ssize_t p_start,
547 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200548 LocaleInfo *locale, int toupper)
549{
550 /* Used to keep track of digits, decimal, and remainder. */
551 Py_ssize_t d_pos = d_start;
552 unsigned int kind = PyUnicode_KIND(out);
553 void *data = PyUnicode_DATA(out);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200554 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555
556 if (spec->n_lpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100557 PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200558 pos += spec->n_lpadding;
559 }
560 if (spec->n_sign == 1) {
561 PyUnicode_WRITE(kind, data, pos++, spec->sign);
562 }
563 if (spec->n_prefix) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200564 if (PyUnicode_CopyCharacters(out, pos,
565 prefix, p_start,
566 spec->n_prefix) < 0)
567 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 if (toupper) {
569 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500570 for (t = 0; t < spec->n_prefix; t++) {
571 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100572 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100573 assert (c <= 127);
Victor Stinnered277852012-02-01 00:22:23 +0100574 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500575 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 }
577 pos += spec->n_prefix;
578 }
579 if (spec->n_spadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100580 PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 pos += spec->n_spadding;
582 }
583
584 /* Only for type 'c' special case, it has no digits. */
585 if (spec->n_digits != 0) {
586 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200587 char *pdigits;
588 if (PyUnicode_READY(digits))
589 return -1;
590 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200591 if (PyUnicode_KIND(digits) < kind) {
592 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200593 if (pdigits == NULL)
594 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200595 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100596 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100597 out, pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200598 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200599 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100601 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100602 if (r == -1)
603 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200604 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200605 if (PyUnicode_KIND(digits) < kind)
606 PyMem_Free(pdigits);
607 d_pos += spec->n_digits;
608 }
609 if (toupper) {
610 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500611 for (t = 0; t < spec->n_grouped_digits; t++) {
612 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100613 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500614 if (c > 127) {
615 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
616 return -1;
617 }
Victor Stinnered277852012-02-01 00:22:23 +0100618 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500619 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200620 }
621 pos += spec->n_grouped_digits;
622
623 if (spec->n_decimal) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100624 if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0)
625 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 pos += spec->n_decimal;
627 d_pos += 1;
628 }
629
630 if (spec->n_remainder) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200631 if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
632 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 pos += spec->n_remainder;
634 d_pos += spec->n_remainder;
635 }
636
637 if (spec->n_rpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100638 PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 pos += spec->n_rpadding;
640 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200641 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642}
643
644static char no_grouping[1] = {CHAR_MAX};
645
646/* Find the decimal point character(s?), thousands_separator(s?), and
647 grouping description, either for the current locale if type is
648 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
649 none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100650static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200651get_locale_info(int type, LocaleInfo *locale_info)
652{
653 switch (type) {
654 case LT_CURRENT_LOCALE: {
655 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100656 locale_info->decimal_point = PyUnicode_DecodeLocale(
657 locale_data->decimal_point,
658 NULL);
659 if (locale_info->decimal_point == NULL)
660 return -1;
661 locale_info->thousands_sep = PyUnicode_DecodeLocale(
662 locale_data->thousands_sep,
663 NULL);
664 if (locale_info->thousands_sep == NULL) {
665 Py_DECREF(locale_info->decimal_point);
666 return -1;
667 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200668 locale_info->grouping = locale_data->grouping;
669 break;
670 }
671 case LT_DEFAULT_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100672 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
673 locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
674 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
675 Py_XDECREF(locale_info->decimal_point);
676 Py_XDECREF(locale_info->thousands_sep);
677 return -1;
678 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200679 locale_info->grouping = "\3"; /* Group every 3 characters. The
680 (implicit) trailing 0 means repeat
681 infinitely. */
682 break;
683 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100684 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
685 locale_info->thousands_sep = PyUnicode_New(0, 0);
686 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
687 Py_XDECREF(locale_info->decimal_point);
688 Py_XDECREF(locale_info->thousands_sep);
689 return -1;
690 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691 locale_info->grouping = no_grouping;
692 break;
693 default:
694 assert(0);
695 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100696 return 0;
697}
698
699static void
700free_locale_info(LocaleInfo *locale_info)
701{
702 Py_XDECREF(locale_info->decimal_point);
703 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200704}
705
706/************************************************************************/
707/*********** string formatting ******************************************/
708/************************************************************************/
709
710static PyObject *
711format_string_internal(PyObject *value, const InternalFormatSpec *format)
712{
713 Py_ssize_t lpad;
714 Py_ssize_t rpad;
715 Py_ssize_t total;
716 Py_ssize_t pos;
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200717 Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200718 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100719 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720
721 /* sign is not allowed on strings */
722 if (format->sign != '\0') {
723 PyErr_SetString(PyExc_ValueError,
724 "Sign not allowed in string format specifier");
725 goto done;
726 }
727
728 /* alternate is not allowed on strings */
729 if (format->alternate) {
730 PyErr_SetString(PyExc_ValueError,
731 "Alternate form (#) not allowed in string format "
732 "specifier");
733 goto done;
734 }
735
736 /* '=' alignment not allowed on strings */
737 if (format->align == '=') {
738 PyErr_SetString(PyExc_ValueError,
739 "'=' alignment not allowed "
740 "in string format specifier");
741 goto done;
742 }
743
744 /* if precision is specified, output no more that format.precision
745 characters */
746 if (format->precision >= 0 && len >= format->precision) {
747 len = format->precision;
748 }
749
750 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
751
Victor Stinnera4ac6002012-01-21 15:50:49 +0100752 if (lpad != 0 || rpad != 0)
753 maxchar = Py_MAX(maxchar, format->fill_char);
754
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200755 /* allocate the resulting string */
756 result = PyUnicode_New(total, maxchar);
757 if (result == NULL)
758 goto done;
759
760 /* Write into that space. First the padding. */
761 pos = fill_padding(result, 0, len,
762 format->fill_char=='\0'?' ':format->fill_char,
763 lpad, rpad);
764
765 /* Then the source string. */
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200766 if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
767 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768
769done:
Victor Stinnered277852012-02-01 00:22:23 +0100770 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771 return result;
772}
773
774
775/************************************************************************/
776/*********** long formatting ********************************************/
777/************************************************************************/
778
779typedef PyObject*
780(*IntOrLongToString)(PyObject *value, int base);
781
782static PyObject *
783format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
784 IntOrLongToString tostring)
785{
786 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100787 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200788 PyObject *tmp = NULL;
789 Py_ssize_t inumeric_chars;
790 Py_UCS4 sign_char = '\0';
791 Py_ssize_t n_digits; /* count of digits need from the computed
792 string */
793 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
794 produces non-digits */
795 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
796 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100797 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 NumberFieldWidths spec;
799 long x;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200800 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801
802 /* Locale settings, either from the actual locale or
803 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100804 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200805
806 /* no precision allowed on integers */
807 if (format->precision != -1) {
808 PyErr_SetString(PyExc_ValueError,
809 "Precision not allowed in integer format specifier");
810 goto done;
811 }
812
813 /* special case for character formatting */
814 if (format->type == 'c') {
815 /* error to specify a sign */
816 if (format->sign != '\0') {
817 PyErr_SetString(PyExc_ValueError,
818 "Sign not allowed with integer"
819 " format specifier 'c'");
820 goto done;
821 }
822
823 /* taken from unicodeobject.c formatchar() */
824 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200825 x = PyLong_AsLong(value);
826 if (x == -1 && PyErr_Occurred())
827 goto done;
828 if (x < 0 || x > 0x10ffff) {
829 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100830 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200831 goto done;
832 }
833 tmp = PyUnicode_FromOrdinal(x);
834 inumeric_chars = 0;
835 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100836 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200837
838 /* As a sort-of hack, we tell calc_number_widths that we only
839 have "remainder" characters. calc_number_widths thinks
840 these are characters that don't get formatted, only copied
841 into the output string. We do this for 'c' formatting,
842 because the characters are likely to be non-digits. */
843 n_remainder = 1;
844 }
845 else {
846 int base;
847 int leading_chars_to_skip = 0; /* Number of characters added by
848 PyNumber_ToBase that we want to
849 skip over. */
850
851 /* Compute the base and how many characters will be added by
852 PyNumber_ToBase */
853 switch (format->type) {
854 case 'b':
855 base = 2;
856 leading_chars_to_skip = 2; /* 0b */
857 break;
858 case 'o':
859 base = 8;
860 leading_chars_to_skip = 2; /* 0o */
861 break;
862 case 'x':
863 case 'X':
864 base = 16;
865 leading_chars_to_skip = 2; /* 0x */
866 break;
867 default: /* shouldn't be needed, but stops a compiler warning */
868 case 'd':
869 case 'n':
870 base = 10;
871 break;
872 }
873
874 /* The number of prefix chars is the same as the leading
875 chars to skip */
876 if (format->alternate)
877 n_prefix = leading_chars_to_skip;
878
879 /* Do the hard part, converting to a string in a given base */
880 tmp = tostring(value, base);
881 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
882 goto done;
883
884 inumeric_chars = 0;
885 n_digits = PyUnicode_GET_LENGTH(tmp);
886
887 prefix = inumeric_chars;
888
889 /* Is a sign character present in the output? If so, remember it
890 and skip it */
891 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
892 sign_char = '-';
893 ++prefix;
894 ++leading_chars_to_skip;
895 }
896
897 /* Skip over the leading chars (0x, 0b, etc.) */
898 n_digits -= leading_chars_to_skip;
899 inumeric_chars += leading_chars_to_skip;
900 }
901
902 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100903 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
904 (format->thousands_separators ?
905 LT_DEFAULT_LOCALE :
906 LT_NO_LOCALE),
907 &locale) == -1)
908 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200909
910 /* Calculate how much memory we'll need. */
911 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100912 inumeric_chars + n_digits, n_remainder, 0,
913 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100914
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200915 /* Allocate the memory. */
916 result = PyUnicode_New(n_total, maxchar);
917 if (!result)
918 goto done;
919
920 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +0200921 err = fill_number(result, 0, &spec,
922 tmp, inumeric_chars, inumeric_chars + n_digits,
923 tmp, prefix,
924 format->fill_char == '\0' ? ' ' : format->fill_char,
925 &locale, format->type == 'X');
926 if (err)
927 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200928
929done:
930 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +0100931 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +0100932 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200933 return result;
934}
935
936/************************************************************************/
937/*********** float formatting *******************************************/
938/************************************************************************/
939
940static PyObject*
941strtounicode(char *charbuffer, Py_ssize_t len)
942{
943 return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
944}
945
946/* much of this is taken from unicodeobject.c */
947static PyObject *
948format_float_internal(PyObject *value,
949 const InternalFormatSpec *format)
950{
951 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
952 Py_ssize_t n_digits;
953 Py_ssize_t n_remainder;
954 Py_ssize_t n_total;
955 int has_decimal;
956 double val;
957 Py_ssize_t precision = format->precision;
958 Py_ssize_t default_precision = 6;
959 Py_UCS4 type = format->type;
960 int add_pct = 0;
961 Py_ssize_t index;
962 NumberFieldWidths spec;
963 int flags = 0;
964 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100965 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 Py_UCS4 sign_char = '\0';
967 int float_type; /* Used to see if we have a nan, inf, or regular float. */
968 PyObject *unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200969 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200970
971 /* Locale settings, either from the actual locale or
972 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100973 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974
975 if (format->alternate)
976 flags |= Py_DTSF_ALT;
977
978 if (type == '\0') {
979 /* Omitted type specifier. Behaves in the same way as repr(x)
980 and str(x) if no precision is given, else like 'g', but with
981 at least one digit after the decimal point. */
982 flags |= Py_DTSF_ADD_DOT_0;
983 type = 'r';
984 default_precision = 0;
985 }
986
987 if (type == 'n')
988 /* 'n' is the same as 'g', except for the locale used to
989 format the result. We take care of that later. */
990 type = 'g';
991
992 val = PyFloat_AsDouble(value);
993 if (val == -1.0 && PyErr_Occurred())
994 goto done;
995
996 if (type == '%') {
997 type = 'f';
998 val *= 100;
999 add_pct = 1;
1000 }
1001
1002 if (precision < 0)
1003 precision = default_precision;
1004 else if (type == 'r')
1005 type = 'g';
1006
1007 /* Cast "type", because if we're in unicode we need to pass a
1008 8-bit char. This is safe, because we've restricted what "type"
1009 can be. */
1010 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1011 &float_type);
1012 if (buf == NULL)
1013 goto done;
1014 n_digits = strlen(buf);
1015
1016 if (add_pct) {
1017 /* We know that buf has a trailing zero (since we just called
1018 strlen() on it), and we don't use that fact any more. So we
1019 can just write over the trailing zero. */
1020 buf[n_digits] = '%';
1021 n_digits += 1;
1022 }
1023
1024 /* Since there is no unicode version of PyOS_double_to_string,
1025 just use the 8 bit version and then convert to unicode. */
1026 unicode_tmp = strtounicode(buf, n_digits);
1027 if (unicode_tmp == NULL)
1028 goto done;
1029 index = 0;
1030
1031 /* Is a sign character present in the output? If so, remember it
1032 and skip it */
1033 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1034 sign_char = '-';
1035 ++index;
1036 --n_digits;
1037 }
1038
1039 /* Determine if we have any "remainder" (after the digits, might include
1040 decimal or exponent or both (or neither)) */
1041 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1042
1043 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001044 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1045 (format->thousands_separators ?
1046 LT_DEFAULT_LOCALE :
1047 LT_NO_LOCALE),
1048 &locale) == -1)
1049 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001050
1051 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001052 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001053 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001054 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001055
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001056 /* Allocate the memory. */
1057 result = PyUnicode_New(n_total, maxchar);
1058 if (result == NULL)
1059 goto done;
1060
1061 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001062 err = fill_number(result, 0, &spec,
1063 unicode_tmp, index, index + n_digits,
1064 NULL, 0,
1065 format->fill_char == '\0' ? ' ' : format->fill_char,
1066 &locale, 0);
1067 if (err)
1068 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001069
1070done:
1071 PyMem_Free(buf);
1072 Py_DECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001073 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +01001074 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001075 return result;
1076}
1077
1078/************************************************************************/
1079/*********** complex formatting *****************************************/
1080/************************************************************************/
1081
1082static PyObject *
1083format_complex_internal(PyObject *value,
1084 const InternalFormatSpec *format)
1085{
1086 double re;
1087 double im;
1088 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1089 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1090
1091 InternalFormatSpec tmp_format = *format;
1092 Py_ssize_t n_re_digits;
1093 Py_ssize_t n_im_digits;
1094 Py_ssize_t n_re_remainder;
1095 Py_ssize_t n_im_remainder;
1096 Py_ssize_t n_re_total;
1097 Py_ssize_t n_im_total;
1098 int re_has_decimal;
1099 int im_has_decimal;
1100 Py_ssize_t precision = format->precision;
1101 Py_ssize_t default_precision = 6;
1102 Py_UCS4 type = format->type;
1103 Py_ssize_t i_re;
1104 Py_ssize_t i_im;
1105 NumberFieldWidths re_spec;
1106 NumberFieldWidths im_spec;
1107 int flags = 0;
1108 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001109 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 int rkind;
1111 void *rdata;
1112 Py_ssize_t index;
1113 Py_UCS4 re_sign_char = '\0';
1114 Py_UCS4 im_sign_char = '\0';
1115 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1116 int im_float_type;
1117 int add_parens = 0;
1118 int skip_re = 0;
1119 Py_ssize_t lpad;
1120 Py_ssize_t rpad;
1121 Py_ssize_t total;
1122 PyObject *re_unicode_tmp = NULL;
1123 PyObject *im_unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +02001124 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125
1126 /* Locale settings, either from the actual locale or
1127 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001128 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001129
1130 /* Zero padding is not allowed. */
1131 if (format->fill_char == '0') {
1132 PyErr_SetString(PyExc_ValueError,
1133 "Zero padding is not allowed in complex format "
1134 "specifier");
1135 goto done;
1136 }
1137
1138 /* Neither is '=' alignment . */
1139 if (format->align == '=') {
1140 PyErr_SetString(PyExc_ValueError,
1141 "'=' alignment flag is not allowed in complex format "
1142 "specifier");
1143 goto done;
1144 }
1145
1146 re = PyComplex_RealAsDouble(value);
1147 if (re == -1.0 && PyErr_Occurred())
1148 goto done;
1149 im = PyComplex_ImagAsDouble(value);
1150 if (im == -1.0 && PyErr_Occurred())
1151 goto done;
1152
1153 if (format->alternate)
1154 flags |= Py_DTSF_ALT;
1155
1156 if (type == '\0') {
1157 /* Omitted type specifier. Should be like str(self). */
1158 type = 'r';
1159 default_precision = 0;
1160 if (re == 0.0 && copysign(1.0, re) == 1.0)
1161 skip_re = 1;
1162 else
1163 add_parens = 1;
1164 }
1165
1166 if (type == 'n')
1167 /* 'n' is the same as 'g', except for the locale used to
1168 format the result. We take care of that later. */
1169 type = 'g';
1170
1171 if (precision < 0)
1172 precision = default_precision;
1173 else if (type == 'r')
1174 type = 'g';
1175
1176 /* Cast "type", because if we're in unicode we need to pass a
1177 8-bit char. This is safe, because we've restricted what "type"
1178 can be. */
1179 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1180 &re_float_type);
1181 if (re_buf == NULL)
1182 goto done;
1183 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1184 &im_float_type);
1185 if (im_buf == NULL)
1186 goto done;
1187
1188 n_re_digits = strlen(re_buf);
1189 n_im_digits = strlen(im_buf);
1190
1191 /* Since there is no unicode version of PyOS_double_to_string,
1192 just use the 8 bit version and then convert to unicode. */
1193 re_unicode_tmp = strtounicode(re_buf, n_re_digits);
1194 if (re_unicode_tmp == NULL)
1195 goto done;
1196 i_re = 0;
1197
1198 im_unicode_tmp = strtounicode(im_buf, n_im_digits);
1199 if (im_unicode_tmp == NULL)
1200 goto done;
1201 i_im = 0;
1202
1203 /* Is a sign character present in the output? If so, remember it
1204 and skip it */
1205 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1206 re_sign_char = '-';
1207 ++i_re;
1208 --n_re_digits;
1209 }
1210 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1211 im_sign_char = '-';
1212 ++i_im;
1213 --n_im_digits;
1214 }
1215
1216 /* Determine if we have any "remainder" (after the digits, might include
1217 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001218 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001219 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001220 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001221 &n_im_remainder, &im_has_decimal);
1222
1223 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001224 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1225 (format->thousands_separators ?
1226 LT_DEFAULT_LOCALE :
1227 LT_NO_LOCALE),
1228 &locale) == -1)
1229 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001230
1231 /* Turn off any padding. We'll do it later after we've composed
1232 the numbers without padding. */
1233 tmp_format.fill_char = '\0';
1234 tmp_format.align = '<';
1235 tmp_format.width = -1;
1236
1237 /* Calculate how much memory we'll need. */
1238 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1239 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001240 re_has_decimal, &locale, &tmp_format,
1241 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001242
1243 /* Same formatting, but always include a sign, unless the real part is
1244 * going to be omitted, in which case we use whatever sign convention was
1245 * requested by the original format. */
1246 if (!skip_re)
1247 tmp_format.sign = '+';
1248 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1249 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001250 im_has_decimal, &locale, &tmp_format,
1251 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001252
1253 if (skip_re)
1254 n_re_total = 0;
1255
1256 /* Add 1 for the 'j', and optionally 2 for parens. */
1257 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1258 format->width, format->align, &lpad, &rpad, &total);
1259
Victor Stinner41a863c2012-02-24 00:37:51 +01001260 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001261 maxchar = Py_MAX(maxchar, format->fill_char);
1262
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001263 result = PyUnicode_New(total, maxchar);
1264 if (result == NULL)
1265 goto done;
1266 rkind = PyUnicode_KIND(result);
1267 rdata = PyUnicode_DATA(result);
1268
1269 /* Populate the memory. First, the padding. */
1270 index = fill_padding(result, 0,
1271 n_re_total + n_im_total + 1 + add_parens * 2,
1272 format->fill_char=='\0' ? ' ' : format->fill_char,
1273 lpad, rpad);
1274
1275 if (add_parens)
1276 PyUnicode_WRITE(rkind, rdata, index++, '(');
1277
1278 if (!skip_re) {
Victor Stinnerafbaa202011-09-28 21:50:16 +02001279 err = fill_number(result, index, &re_spec,
1280 re_unicode_tmp, i_re, i_re + n_re_digits,
1281 NULL, 0,
1282 0,
1283 &locale, 0);
1284 if (err) {
1285 Py_CLEAR(result);
1286 goto done;
1287 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 index += n_re_total;
1289 }
Victor Stinnerafbaa202011-09-28 21:50:16 +02001290 err = fill_number(result, index, &im_spec,
1291 im_unicode_tmp, i_im, i_im + n_im_digits,
1292 NULL, 0,
1293 0,
1294 &locale, 0);
1295 if (err) {
1296 Py_CLEAR(result);
1297 goto done;
1298 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 index += n_im_total;
1300 PyUnicode_WRITE(rkind, rdata, index++, 'j');
1301
1302 if (add_parens)
1303 PyUnicode_WRITE(rkind, rdata, index++, ')');
1304
1305done:
1306 PyMem_Free(re_buf);
1307 PyMem_Free(im_buf);
1308 Py_XDECREF(re_unicode_tmp);
1309 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001310 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +01001311 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312 return result;
1313}
1314
1315/************************************************************************/
1316/*********** built in formatters ****************************************/
1317/************************************************************************/
1318PyObject *
1319_PyUnicode_FormatAdvanced(PyObject *obj,
1320 PyObject *format_spec,
1321 Py_ssize_t start, Py_ssize_t end)
1322{
1323 InternalFormatSpec format;
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001324 PyObject *result;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325
1326 /* check for the special case of zero length format spec, make
1327 it equivalent to str(obj) */
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001328 if (start == end)
1329 return PyObject_Str(obj);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001330
1331 /* parse the format_spec */
1332 if (!parse_internal_render_format_spec(format_spec, start, end,
1333 &format, 's', '<'))
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001334 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335
1336 /* type conversion? */
1337 switch (format.type) {
1338 case 's':
1339 /* no type conversion needed, already a string. do the formatting */
1340 result = format_string_internal(obj, &format);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001341 if (result != NULL)
1342 assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001343 break;
1344 default:
1345 /* unknown */
1346 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001347 result = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001348 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349 return result;
1350}
1351
1352static PyObject*
1353format_int_or_long(PyObject* obj, PyObject* format_spec,
1354 Py_ssize_t start, Py_ssize_t end,
1355 IntOrLongToString tostring)
1356{
1357 PyObject *result = NULL;
1358 PyObject *tmp = NULL;
1359 InternalFormatSpec format;
1360
1361 /* check for the special case of zero length format spec, make
1362 it equivalent to str(obj) */
1363 if (start == end) {
1364 result = PyObject_Str(obj);
1365 goto done;
1366 }
1367
1368 /* parse the format_spec */
1369 if (!parse_internal_render_format_spec(format_spec, start, end,
1370 &format, 'd', '>'))
1371 goto done;
1372
1373 /* type conversion? */
1374 switch (format.type) {
1375 case 'b':
1376 case 'c':
1377 case 'd':
1378 case 'o':
1379 case 'x':
1380 case 'X':
1381 case 'n':
1382 /* no type conversion needed, already an int (or long). do
1383 the formatting */
1384 result = format_int_or_long_internal(obj, &format, tostring);
1385 break;
1386
1387 case 'e':
1388 case 'E':
1389 case 'f':
1390 case 'F':
1391 case 'g':
1392 case 'G':
1393 case '%':
1394 /* convert to float */
1395 tmp = PyNumber_Float(obj);
1396 if (tmp == NULL)
1397 goto done;
1398 result = format_float_internal(tmp, &format);
1399 break;
1400
1401 default:
1402 /* unknown */
1403 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1404 goto done;
1405 }
1406
1407done:
1408 Py_XDECREF(tmp);
1409 return result;
1410}
1411
1412/* Need to define long_format as a function that will convert a long
1413 to a string. In 3.0, _PyLong_Format has the correct signature. */
1414#define long_format _PyLong_Format
1415
1416PyObject *
1417_PyLong_FormatAdvanced(PyObject *obj,
1418 PyObject *format_spec,
1419 Py_ssize_t start, Py_ssize_t end)
1420{
1421 return format_int_or_long(obj, format_spec, start, end,
1422 long_format);
1423}
1424
1425PyObject *
1426_PyFloat_FormatAdvanced(PyObject *obj,
1427 PyObject *format_spec,
1428 Py_ssize_t start, Py_ssize_t end)
1429{
1430 PyObject *result = NULL;
1431 InternalFormatSpec format;
1432
1433 /* check for the special case of zero length format spec, make
1434 it equivalent to str(obj) */
1435 if (start == end) {
1436 result = PyObject_Str(obj);
1437 goto done;
1438 }
1439
1440 /* parse the format_spec */
1441 if (!parse_internal_render_format_spec(format_spec, start, end,
1442 &format, '\0', '>'))
1443 goto done;
1444
1445 /* type conversion? */
1446 switch (format.type) {
1447 case '\0': /* No format code: like 'g', but with at least one decimal. */
1448 case 'e':
1449 case 'E':
1450 case 'f':
1451 case 'F':
1452 case 'g':
1453 case 'G':
1454 case 'n':
1455 case '%':
1456 /* no conversion, already a float. do the formatting */
1457 result = format_float_internal(obj, &format);
1458 break;
1459
1460 default:
1461 /* unknown */
1462 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1463 goto done;
1464 }
1465
1466done:
1467 return result;
1468}
1469
1470PyObject *
1471_PyComplex_FormatAdvanced(PyObject *obj,
1472 PyObject *format_spec,
1473 Py_ssize_t start, Py_ssize_t end)
1474{
1475 PyObject *result = NULL;
1476 InternalFormatSpec format;
1477
1478 /* check for the special case of zero length format spec, make
1479 it equivalent to str(obj) */
1480 if (start == end) {
1481 result = PyObject_Str(obj);
1482 goto done;
1483 }
1484
1485 /* parse the format_spec */
1486 if (!parse_internal_render_format_spec(format_spec, start, end,
1487 &format, '\0', '>'))
1488 goto done;
1489
1490 /* type conversion? */
1491 switch (format.type) {
1492 case '\0': /* No format code: like 'g', but with at least one decimal. */
1493 case 'e':
1494 case 'E':
1495 case 'f':
1496 case 'F':
1497 case 'g':
1498 case 'G':
1499 case 'n':
1500 /* no conversion, already a complex. do the formatting */
1501 result = format_complex_internal(obj, &format);
1502 break;
1503
1504 default:
1505 /* unknown */
1506 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1507 goto done;
1508 }
1509
1510done:
1511 return result;
1512}