blob: 94f8047e18cf9b6d16f05eb0600052b7639e02ef [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
159
160 format->fill_char = '\0';
161 format->align = default_align;
162 format->alternate = 0;
163 format->sign = '\0';
164 format->width = -1;
165 format->thousands_separators = 0;
166 format->precision = -1;
167 format->type = default_type;
168
169 /* If the second char is an alignment token,
170 then parse the fill char */
171 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
172 format->align = READ_spec(pos+1);
173 format->fill_char = READ_spec(pos);
174 align_specified = 1;
175 pos += 2;
176 }
177 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
178 format->align = READ_spec(pos);
179 align_specified = 1;
180 ++pos;
181 }
182
183 /* Parse the various sign options */
184 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
185 format->sign = READ_spec(pos);
186 ++pos;
187 }
188
189 /* If the next character is #, we're in alternate mode. This only
190 applies to integers. */
191 if (end-pos >= 1 && READ_spec(pos) == '#') {
192 format->alternate = 1;
193 ++pos;
194 }
195
196 /* The special case for 0-padding (backwards compat) */
197 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
198 format->fill_char = '0';
199 if (!align_specified) {
200 format->align = '=';
201 }
202 ++pos;
203 }
204
205 consumed = get_integer(format_spec, &pos, end, &format->width);
206 if (consumed == -1)
207 /* Overflow error. Exception already set. */
208 return 0;
209
210 /* If consumed is 0, we didn't consume any characters for the
211 width. In that case, reset the width to -1, because
212 get_integer() will have set it to zero. -1 is how we record
213 that the width wasn't specified. */
214 if (consumed == 0)
215 format->width = -1;
216
217 /* Comma signifies add thousands separators */
218 if (end-pos && READ_spec(pos) == ',') {
219 format->thousands_separators = 1;
220 ++pos;
221 }
222
223 /* Parse field precision */
224 if (end-pos && READ_spec(pos) == '.') {
225 ++pos;
226
227 consumed = get_integer(format_spec, &pos, end, &format->precision);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* Not having a precision after a dot is an error. */
233 if (consumed == 0) {
234 PyErr_Format(PyExc_ValueError,
235 "Format specifier missing precision");
236 return 0;
237 }
238
239 }
240
241 /* Finally, parse the type field. */
242
243 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500244 /* More than one char remain, invalid format specifier. */
245 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 return 0;
247 }
248
249 if (end-pos == 1) {
250 format->type = READ_spec(pos);
251 ++pos;
252 }
253
254 /* Do as much validating as we can, just by looking at the format
255 specifier. Do not take into account what type of formatting
256 we're doing (int, float, string). */
257
258 if (format->thousands_separators) {
259 switch (format->type) {
260 case 'd':
261 case 'e':
262 case 'f':
263 case 'g':
264 case 'E':
265 case 'G':
266 case '%':
267 case 'F':
268 case '\0':
269 /* These are allowed. See PEP 378.*/
270 break;
271 default:
272 invalid_comma_type(format->type);
273 return 0;
274 }
275 }
276
Victor Stinnera4ac6002012-01-21 15:50:49 +0100277 assert (format->align <= 127);
278 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 return 1;
280}
281
282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* Figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else if (align == '<' || align == '=')
306 *n_lpadding = 0;
307 else {
308 /* We should never have an unspecified alignment. */
309 *n_lpadding = 0;
310 assert(0);
311 }
312
313 *n_rpadding = *n_total - nchars - *n_lpadding;
314}
315
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316/* Do the padding, and return a pointer to where the caller-supplied
317 content goes. */
318static Py_ssize_t
319fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
320 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
321 Py_ssize_t n_rpadding)
322{
323 /* Pad on left. */
324 if (n_lpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100325 PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326
327 /* Pad on right. */
328 if (n_rpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100329 PyUnicode_Fill(s, start + nchars + n_lpadding,
330 start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331
332 /* Pointer to the user content. */
333 return start + n_lpadding;
334}
335
336/************************************************************************/
337/*********** common routines for numeric formatting *********************/
338/************************************************************************/
339
340/* Locale type codes. */
341#define LT_CURRENT_LOCALE 0
342#define LT_DEFAULT_LOCALE 1
343#define LT_NO_LOCALE 2
344
345/* Locale info needed for formatting integers and the part of floats
346 before and including the decimal. Note that locales only support
347 8-bit chars, not unicode. */
348typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100349 PyObject *decimal_point;
350 PyObject *thousands_sep;
351 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352} LocaleInfo;
353
Victor Stinner41a863c2012-02-24 00:37:51 +0100354#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
355
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356/* describes the layout for an integer, see the comment in
357 calc_number_widths() for details */
358typedef struct {
359 Py_ssize_t n_lpadding;
360 Py_ssize_t n_prefix;
361 Py_ssize_t n_spadding;
362 Py_ssize_t n_rpadding;
363 char sign;
364 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
365 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
366 any grouping chars. */
367 Py_ssize_t n_decimal; /* 0 if only an integer */
368 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
369 excluding the decimal itself, if
370 present. */
371
372 /* These 2 are not the widths of fields, but are needed by
373 STRINGLIB_GROUPING. */
374 Py_ssize_t n_digits; /* The number of digits before a decimal
375 or exponent. */
376 Py_ssize_t n_min_width; /* The min_width we used when we computed
377 the n_grouped_digits width. */
378} NumberFieldWidths;
379
380
381/* Given a number of the form:
382 digits[remainder]
383 where ptr points to the start and end points to the end, find where
384 the integer part ends. This could be a decimal, an exponent, both,
385 or neither.
386 If a decimal point is present, set *has_decimal and increment
387 remainder beyond it.
388 Results are undefined (but shouldn't crash) for improperly
389 formatted strings.
390*/
391static void
392parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
393 Py_ssize_t *n_remainder, int *has_decimal)
394{
395 Py_ssize_t remainder;
396
397 while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
398 ++pos;
399 remainder = pos;
400
401 /* Does remainder start with a decimal point? */
402 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
403
404 /* Skip the decimal point. */
405 if (*has_decimal)
406 remainder++;
407
408 *n_remainder = end - remainder;
409}
410
411/* not all fields of format are used. for example, precision is
412 unused. should this take discrete params in order to be more clear
413 about what it does? or is passing a single format parameter easier
414 and more efficient enough to justify a little obfuscation? */
415static Py_ssize_t
416calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
417 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
418 Py_ssize_t n_end, Py_ssize_t n_remainder,
419 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100420 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200421{
422 Py_ssize_t n_non_digit_non_padding;
423 Py_ssize_t n_padding;
424
425 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
426 spec->n_lpadding = 0;
427 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100428 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 spec->n_remainder = n_remainder;
430 spec->n_spadding = 0;
431 spec->n_rpadding = 0;
432 spec->sign = '\0';
433 spec->n_sign = 0;
434
435 /* the output will look like:
436 | |
437 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
438 | |
439
440 sign is computed from format->sign and the actual
441 sign of the number
442
443 prefix is given (it's for the '0x' prefix)
444
445 digits is already known
446
447 the total width is either given, or computed from the
448 actual digits
449
450 only one of lpadding, spadding, and rpadding can be non-zero,
451 and it's calculated from the width and other fields
452 */
453
454 /* compute the various parts we're going to write */
455 switch (format->sign) {
456 case '+':
457 /* always put a + or - */
458 spec->n_sign = 1;
459 spec->sign = (sign_char == '-' ? '-' : '+');
460 break;
461 case ' ':
462 spec->n_sign = 1;
463 spec->sign = (sign_char == '-' ? '-' : ' ');
464 break;
465 default:
466 /* Not specified, or the default (-) */
467 if (sign_char == '-') {
468 spec->n_sign = 1;
469 spec->sign = '-';
470 }
471 }
472
473 /* The number of chars used for non-digits and non-padding. */
474 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
475 spec->n_remainder;
476
477 /* min_width can go negative, that's okay. format->width == -1 means
478 we don't care. */
479 if (format->fill_char == '0' && format->align == '=')
480 spec->n_min_width = format->width - n_non_digit_non_padding;
481 else
482 spec->n_min_width = 0;
483
484 if (spec->n_digits == 0)
485 /* This case only occurs when using 'c' formatting, we need
486 to special case it because the grouping code always wants
487 to have at least one character. */
488 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100489 else {
490 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100492 NULL, 0,
493 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100495 locale->grouping, locale->thousands_sep, &grouping_maxchar);
496 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
497 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498
499 /* Given the desired width and the total of digit and non-digit
500 space we consume, see if we need any padding. format->width can
501 be negative (meaning no padding), but this code still works in
502 that case. */
503 n_padding = format->width -
504 (n_non_digit_non_padding + spec->n_grouped_digits);
505 if (n_padding > 0) {
506 /* Some padding is needed. Determine if it's left, space, or right. */
507 switch (format->align) {
508 case '<':
509 spec->n_rpadding = n_padding;
510 break;
511 case '^':
512 spec->n_lpadding = n_padding / 2;
513 spec->n_rpadding = n_padding - spec->n_lpadding;
514 break;
515 case '=':
516 spec->n_spadding = n_padding;
517 break;
518 case '>':
519 spec->n_lpadding = n_padding;
520 break;
521 default:
522 /* Shouldn't get here, but treat it as '>' */
523 spec->n_lpadding = n_padding;
524 assert(0);
525 break;
526 }
527 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100528
529 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
530 *maxchar = Py_MAX(*maxchar, format->fill_char);
531
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200532 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
533 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
534 spec->n_remainder + spec->n_rpadding;
535}
536
537/* Fill in the digit parts of a numbers's string representation,
538 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200539 Return -1 on error, or 0 on success. */
540static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200541fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
542 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200543 PyObject *prefix, Py_ssize_t p_start,
544 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200545 LocaleInfo *locale, int toupper)
546{
547 /* Used to keep track of digits, decimal, and remainder. */
548 Py_ssize_t d_pos = d_start;
549 unsigned int kind = PyUnicode_KIND(out);
550 void *data = PyUnicode_DATA(out);
551
552#ifndef NDEBUG
553 Py_ssize_t r;
554#endif
555
556 if (spec->n_lpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100557 PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200558 pos += spec->n_lpadding;
559 }
560 if (spec->n_sign == 1) {
561 PyUnicode_WRITE(kind, data, pos++, spec->sign);
562 }
563 if (spec->n_prefix) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200564 if (PyUnicode_CopyCharacters(out, pos,
565 prefix, p_start,
566 spec->n_prefix) < 0)
567 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 if (toupper) {
569 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500570 for (t = 0; t < spec->n_prefix; t++) {
571 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100572 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100573 assert (c <= 127);
Victor Stinnered277852012-02-01 00:22:23 +0100574 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500575 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 }
577 pos += spec->n_prefix;
578 }
579 if (spec->n_spadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100580 PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 pos += spec->n_spadding;
582 }
583
584 /* Only for type 'c' special case, it has no digits. */
585 if (spec->n_digits != 0) {
586 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200587 char *pdigits;
588 if (PyUnicode_READY(digits))
589 return -1;
590 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200591 if (PyUnicode_KIND(digits) < kind) {
592 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200593 if (pdigits == NULL)
594 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200595 }
596#ifndef NDEBUG
597 r =
598#endif
599 _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100600 out, pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200602 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100604 locale->grouping, locale->thousands_sep, NULL);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200605#ifndef NDEBUG
606 assert(r == spec->n_grouped_digits);
607#endif
608 if (PyUnicode_KIND(digits) < kind)
609 PyMem_Free(pdigits);
610 d_pos += spec->n_digits;
611 }
612 if (toupper) {
613 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500614 for (t = 0; t < spec->n_grouped_digits; t++) {
615 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100616 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500617 if (c > 127) {
618 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
619 return -1;
620 }
Victor Stinnered277852012-02-01 00:22:23 +0100621 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500622 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200623 }
624 pos += spec->n_grouped_digits;
625
626 if (spec->n_decimal) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100627 if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0)
628 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 pos += spec->n_decimal;
630 d_pos += 1;
631 }
632
633 if (spec->n_remainder) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200634 if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
635 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200636 pos += spec->n_remainder;
637 d_pos += spec->n_remainder;
638 }
639
640 if (spec->n_rpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100641 PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 pos += spec->n_rpadding;
643 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200644 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645}
646
647static char no_grouping[1] = {CHAR_MAX};
648
649/* Find the decimal point character(s?), thousands_separator(s?), and
650 grouping description, either for the current locale if type is
651 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
652 none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100653static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654get_locale_info(int type, LocaleInfo *locale_info)
655{
656 switch (type) {
657 case LT_CURRENT_LOCALE: {
658 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100659 locale_info->decimal_point = PyUnicode_DecodeLocale(
660 locale_data->decimal_point,
661 NULL);
662 if (locale_info->decimal_point == NULL)
663 return -1;
664 locale_info->thousands_sep = PyUnicode_DecodeLocale(
665 locale_data->thousands_sep,
666 NULL);
667 if (locale_info->thousands_sep == NULL) {
668 Py_DECREF(locale_info->decimal_point);
669 return -1;
670 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200671 locale_info->grouping = locale_data->grouping;
672 break;
673 }
674 case LT_DEFAULT_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100675 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
676 locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
677 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
678 Py_XDECREF(locale_info->decimal_point);
679 Py_XDECREF(locale_info->thousands_sep);
680 return -1;
681 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200682 locale_info->grouping = "\3"; /* Group every 3 characters. The
683 (implicit) trailing 0 means repeat
684 infinitely. */
685 break;
686 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100687 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
688 locale_info->thousands_sep = PyUnicode_New(0, 0);
689 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
690 Py_XDECREF(locale_info->decimal_point);
691 Py_XDECREF(locale_info->thousands_sep);
692 return -1;
693 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694 locale_info->grouping = no_grouping;
695 break;
696 default:
697 assert(0);
698 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100699 return 0;
700}
701
702static void
703free_locale_info(LocaleInfo *locale_info)
704{
705 Py_XDECREF(locale_info->decimal_point);
706 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707}
708
709/************************************************************************/
710/*********** string formatting ******************************************/
711/************************************************************************/
712
713static PyObject *
714format_string_internal(PyObject *value, const InternalFormatSpec *format)
715{
716 Py_ssize_t lpad;
717 Py_ssize_t rpad;
718 Py_ssize_t total;
719 Py_ssize_t pos;
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200720 Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200721 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100722 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200723
724 /* sign is not allowed on strings */
725 if (format->sign != '\0') {
726 PyErr_SetString(PyExc_ValueError,
727 "Sign not allowed in string format specifier");
728 goto done;
729 }
730
731 /* alternate is not allowed on strings */
732 if (format->alternate) {
733 PyErr_SetString(PyExc_ValueError,
734 "Alternate form (#) not allowed in string format "
735 "specifier");
736 goto done;
737 }
738
739 /* '=' alignment not allowed on strings */
740 if (format->align == '=') {
741 PyErr_SetString(PyExc_ValueError,
742 "'=' alignment not allowed "
743 "in string format specifier");
744 goto done;
745 }
746
747 /* if precision is specified, output no more that format.precision
748 characters */
749 if (format->precision >= 0 && len >= format->precision) {
750 len = format->precision;
751 }
752
753 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
754
Victor Stinnera4ac6002012-01-21 15:50:49 +0100755 if (lpad != 0 || rpad != 0)
756 maxchar = Py_MAX(maxchar, format->fill_char);
757
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200758 /* allocate the resulting string */
759 result = PyUnicode_New(total, maxchar);
760 if (result == NULL)
761 goto done;
762
763 /* Write into that space. First the padding. */
764 pos = fill_padding(result, 0, len,
765 format->fill_char=='\0'?' ':format->fill_char,
766 lpad, rpad);
767
768 /* Then the source string. */
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200769 if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
770 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771
772done:
Victor Stinnered277852012-02-01 00:22:23 +0100773 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200774 return result;
775}
776
777
778/************************************************************************/
779/*********** long formatting ********************************************/
780/************************************************************************/
781
782typedef PyObject*
783(*IntOrLongToString)(PyObject *value, int base);
784
785static PyObject *
786format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
787 IntOrLongToString tostring)
788{
789 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100790 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200791 PyObject *tmp = NULL;
792 Py_ssize_t inumeric_chars;
793 Py_UCS4 sign_char = '\0';
794 Py_ssize_t n_digits; /* count of digits need from the computed
795 string */
796 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
797 produces non-digits */
798 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
799 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100800 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801 NumberFieldWidths spec;
802 long x;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200803 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200804
805 /* Locale settings, either from the actual locale or
806 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100807 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808
809 /* no precision allowed on integers */
810 if (format->precision != -1) {
811 PyErr_SetString(PyExc_ValueError,
812 "Precision not allowed in integer format specifier");
813 goto done;
814 }
815
816 /* special case for character formatting */
817 if (format->type == 'c') {
818 /* error to specify a sign */
819 if (format->sign != '\0') {
820 PyErr_SetString(PyExc_ValueError,
821 "Sign not allowed with integer"
822 " format specifier 'c'");
823 goto done;
824 }
825
826 /* taken from unicodeobject.c formatchar() */
827 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200828 x = PyLong_AsLong(value);
829 if (x == -1 && PyErr_Occurred())
830 goto done;
831 if (x < 0 || x > 0x10ffff) {
832 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100833 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200834 goto done;
835 }
836 tmp = PyUnicode_FromOrdinal(x);
837 inumeric_chars = 0;
838 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100839 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200840
841 /* As a sort-of hack, we tell calc_number_widths that we only
842 have "remainder" characters. calc_number_widths thinks
843 these are characters that don't get formatted, only copied
844 into the output string. We do this for 'c' formatting,
845 because the characters are likely to be non-digits. */
846 n_remainder = 1;
847 }
848 else {
849 int base;
850 int leading_chars_to_skip = 0; /* Number of characters added by
851 PyNumber_ToBase that we want to
852 skip over. */
853
854 /* Compute the base and how many characters will be added by
855 PyNumber_ToBase */
856 switch (format->type) {
857 case 'b':
858 base = 2;
859 leading_chars_to_skip = 2; /* 0b */
860 break;
861 case 'o':
862 base = 8;
863 leading_chars_to_skip = 2; /* 0o */
864 break;
865 case 'x':
866 case 'X':
867 base = 16;
868 leading_chars_to_skip = 2; /* 0x */
869 break;
870 default: /* shouldn't be needed, but stops a compiler warning */
871 case 'd':
872 case 'n':
873 base = 10;
874 break;
875 }
876
877 /* The number of prefix chars is the same as the leading
878 chars to skip */
879 if (format->alternate)
880 n_prefix = leading_chars_to_skip;
881
882 /* Do the hard part, converting to a string in a given base */
883 tmp = tostring(value, base);
884 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
885 goto done;
886
887 inumeric_chars = 0;
888 n_digits = PyUnicode_GET_LENGTH(tmp);
889
890 prefix = inumeric_chars;
891
892 /* Is a sign character present in the output? If so, remember it
893 and skip it */
894 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
895 sign_char = '-';
896 ++prefix;
897 ++leading_chars_to_skip;
898 }
899
900 /* Skip over the leading chars (0x, 0b, etc.) */
901 n_digits -= leading_chars_to_skip;
902 inumeric_chars += leading_chars_to_skip;
903 }
904
905 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100906 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
907 (format->thousands_separators ?
908 LT_DEFAULT_LOCALE :
909 LT_NO_LOCALE),
910 &locale) == -1)
911 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200912
913 /* Calculate how much memory we'll need. */
914 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100915 inumeric_chars + n_digits, n_remainder, 0,
916 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100917
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200918 /* Allocate the memory. */
919 result = PyUnicode_New(n_total, maxchar);
920 if (!result)
921 goto done;
922
923 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +0200924 err = fill_number(result, 0, &spec,
925 tmp, inumeric_chars, inumeric_chars + n_digits,
926 tmp, prefix,
927 format->fill_char == '\0' ? ' ' : format->fill_char,
928 &locale, format->type == 'X');
929 if (err)
930 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200931
932done:
933 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +0100934 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +0100935 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200936 return result;
937}
938
939/************************************************************************/
940/*********** float formatting *******************************************/
941/************************************************************************/
942
943static PyObject*
944strtounicode(char *charbuffer, Py_ssize_t len)
945{
946 return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
947}
948
949/* much of this is taken from unicodeobject.c */
950static PyObject *
951format_float_internal(PyObject *value,
952 const InternalFormatSpec *format)
953{
954 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
955 Py_ssize_t n_digits;
956 Py_ssize_t n_remainder;
957 Py_ssize_t n_total;
958 int has_decimal;
959 double val;
960 Py_ssize_t precision = format->precision;
961 Py_ssize_t default_precision = 6;
962 Py_UCS4 type = format->type;
963 int add_pct = 0;
964 Py_ssize_t index;
965 NumberFieldWidths spec;
966 int flags = 0;
967 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100968 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 Py_UCS4 sign_char = '\0';
970 int float_type; /* Used to see if we have a nan, inf, or regular float. */
971 PyObject *unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200972 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973
974 /* Locale settings, either from the actual locale or
975 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100976 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977
978 if (format->alternate)
979 flags |= Py_DTSF_ALT;
980
981 if (type == '\0') {
982 /* Omitted type specifier. Behaves in the same way as repr(x)
983 and str(x) if no precision is given, else like 'g', but with
984 at least one digit after the decimal point. */
985 flags |= Py_DTSF_ADD_DOT_0;
986 type = 'r';
987 default_precision = 0;
988 }
989
990 if (type == 'n')
991 /* 'n' is the same as 'g', except for the locale used to
992 format the result. We take care of that later. */
993 type = 'g';
994
995 val = PyFloat_AsDouble(value);
996 if (val == -1.0 && PyErr_Occurred())
997 goto done;
998
999 if (type == '%') {
1000 type = 'f';
1001 val *= 100;
1002 add_pct = 1;
1003 }
1004
1005 if (precision < 0)
1006 precision = default_precision;
1007 else if (type == 'r')
1008 type = 'g';
1009
1010 /* Cast "type", because if we're in unicode we need to pass a
1011 8-bit char. This is safe, because we've restricted what "type"
1012 can be. */
1013 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1014 &float_type);
1015 if (buf == NULL)
1016 goto done;
1017 n_digits = strlen(buf);
1018
1019 if (add_pct) {
1020 /* We know that buf has a trailing zero (since we just called
1021 strlen() on it), and we don't use that fact any more. So we
1022 can just write over the trailing zero. */
1023 buf[n_digits] = '%';
1024 n_digits += 1;
1025 }
1026
1027 /* Since there is no unicode version of PyOS_double_to_string,
1028 just use the 8 bit version and then convert to unicode. */
1029 unicode_tmp = strtounicode(buf, n_digits);
1030 if (unicode_tmp == NULL)
1031 goto done;
1032 index = 0;
1033
1034 /* Is a sign character present in the output? If so, remember it
1035 and skip it */
1036 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1037 sign_char = '-';
1038 ++index;
1039 --n_digits;
1040 }
1041
1042 /* Determine if we have any "remainder" (after the digits, might include
1043 decimal or exponent or both (or neither)) */
1044 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1045
1046 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001047 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1048 (format->thousands_separators ?
1049 LT_DEFAULT_LOCALE :
1050 LT_NO_LOCALE),
1051 &locale) == -1)
1052 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001053
1054 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001055 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001056 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001057 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001058
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001059 /* Allocate the memory. */
1060 result = PyUnicode_New(n_total, maxchar);
1061 if (result == NULL)
1062 goto done;
1063
1064 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001065 err = fill_number(result, 0, &spec,
1066 unicode_tmp, index, index + n_digits,
1067 NULL, 0,
1068 format->fill_char == '\0' ? ' ' : format->fill_char,
1069 &locale, 0);
1070 if (err)
1071 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001072
1073done:
1074 PyMem_Free(buf);
1075 Py_DECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001076 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +01001077 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 return result;
1079}
1080
1081/************************************************************************/
1082/*********** complex formatting *****************************************/
1083/************************************************************************/
1084
1085static PyObject *
1086format_complex_internal(PyObject *value,
1087 const InternalFormatSpec *format)
1088{
1089 double re;
1090 double im;
1091 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1092 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1093
1094 InternalFormatSpec tmp_format = *format;
1095 Py_ssize_t n_re_digits;
1096 Py_ssize_t n_im_digits;
1097 Py_ssize_t n_re_remainder;
1098 Py_ssize_t n_im_remainder;
1099 Py_ssize_t n_re_total;
1100 Py_ssize_t n_im_total;
1101 int re_has_decimal;
1102 int im_has_decimal;
1103 Py_ssize_t precision = format->precision;
1104 Py_ssize_t default_precision = 6;
1105 Py_UCS4 type = format->type;
1106 Py_ssize_t i_re;
1107 Py_ssize_t i_im;
1108 NumberFieldWidths re_spec;
1109 NumberFieldWidths im_spec;
1110 int flags = 0;
1111 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001112 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 int rkind;
1114 void *rdata;
1115 Py_ssize_t index;
1116 Py_UCS4 re_sign_char = '\0';
1117 Py_UCS4 im_sign_char = '\0';
1118 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1119 int im_float_type;
1120 int add_parens = 0;
1121 int skip_re = 0;
1122 Py_ssize_t lpad;
1123 Py_ssize_t rpad;
1124 Py_ssize_t total;
1125 PyObject *re_unicode_tmp = NULL;
1126 PyObject *im_unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +02001127 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128
1129 /* Locale settings, either from the actual locale or
1130 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001131 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001132
1133 /* Zero padding is not allowed. */
1134 if (format->fill_char == '0') {
1135 PyErr_SetString(PyExc_ValueError,
1136 "Zero padding is not allowed in complex format "
1137 "specifier");
1138 goto done;
1139 }
1140
1141 /* Neither is '=' alignment . */
1142 if (format->align == '=') {
1143 PyErr_SetString(PyExc_ValueError,
1144 "'=' alignment flag is not allowed in complex format "
1145 "specifier");
1146 goto done;
1147 }
1148
1149 re = PyComplex_RealAsDouble(value);
1150 if (re == -1.0 && PyErr_Occurred())
1151 goto done;
1152 im = PyComplex_ImagAsDouble(value);
1153 if (im == -1.0 && PyErr_Occurred())
1154 goto done;
1155
1156 if (format->alternate)
1157 flags |= Py_DTSF_ALT;
1158
1159 if (type == '\0') {
1160 /* Omitted type specifier. Should be like str(self). */
1161 type = 'r';
1162 default_precision = 0;
1163 if (re == 0.0 && copysign(1.0, re) == 1.0)
1164 skip_re = 1;
1165 else
1166 add_parens = 1;
1167 }
1168
1169 if (type == 'n')
1170 /* 'n' is the same as 'g', except for the locale used to
1171 format the result. We take care of that later. */
1172 type = 'g';
1173
1174 if (precision < 0)
1175 precision = default_precision;
1176 else if (type == 'r')
1177 type = 'g';
1178
1179 /* Cast "type", because if we're in unicode we need to pass a
1180 8-bit char. This is safe, because we've restricted what "type"
1181 can be. */
1182 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1183 &re_float_type);
1184 if (re_buf == NULL)
1185 goto done;
1186 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1187 &im_float_type);
1188 if (im_buf == NULL)
1189 goto done;
1190
1191 n_re_digits = strlen(re_buf);
1192 n_im_digits = strlen(im_buf);
1193
1194 /* Since there is no unicode version of PyOS_double_to_string,
1195 just use the 8 bit version and then convert to unicode. */
1196 re_unicode_tmp = strtounicode(re_buf, n_re_digits);
1197 if (re_unicode_tmp == NULL)
1198 goto done;
1199 i_re = 0;
1200
1201 im_unicode_tmp = strtounicode(im_buf, n_im_digits);
1202 if (im_unicode_tmp == NULL)
1203 goto done;
1204 i_im = 0;
1205
1206 /* Is a sign character present in the output? If so, remember it
1207 and skip it */
1208 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1209 re_sign_char = '-';
1210 ++i_re;
1211 --n_re_digits;
1212 }
1213 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1214 im_sign_char = '-';
1215 ++i_im;
1216 --n_im_digits;
1217 }
1218
1219 /* Determine if we have any "remainder" (after the digits, might include
1220 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001221 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001222 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001223 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001224 &n_im_remainder, &im_has_decimal);
1225
1226 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001227 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1228 (format->thousands_separators ?
1229 LT_DEFAULT_LOCALE :
1230 LT_NO_LOCALE),
1231 &locale) == -1)
1232 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001233
1234 /* Turn off any padding. We'll do it later after we've composed
1235 the numbers without padding. */
1236 tmp_format.fill_char = '\0';
1237 tmp_format.align = '<';
1238 tmp_format.width = -1;
1239
1240 /* Calculate how much memory we'll need. */
1241 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1242 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001243 re_has_decimal, &locale, &tmp_format,
1244 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001245
1246 /* Same formatting, but always include a sign, unless the real part is
1247 * going to be omitted, in which case we use whatever sign convention was
1248 * requested by the original format. */
1249 if (!skip_re)
1250 tmp_format.sign = '+';
1251 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1252 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001253 im_has_decimal, &locale, &tmp_format,
1254 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001255
1256 if (skip_re)
1257 n_re_total = 0;
1258
1259 /* Add 1 for the 'j', and optionally 2 for parens. */
1260 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1261 format->width, format->align, &lpad, &rpad, &total);
1262
Victor Stinner41a863c2012-02-24 00:37:51 +01001263 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001264 maxchar = Py_MAX(maxchar, format->fill_char);
1265
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001266 result = PyUnicode_New(total, maxchar);
1267 if (result == NULL)
1268 goto done;
1269 rkind = PyUnicode_KIND(result);
1270 rdata = PyUnicode_DATA(result);
1271
1272 /* Populate the memory. First, the padding. */
1273 index = fill_padding(result, 0,
1274 n_re_total + n_im_total + 1 + add_parens * 2,
1275 format->fill_char=='\0' ? ' ' : format->fill_char,
1276 lpad, rpad);
1277
1278 if (add_parens)
1279 PyUnicode_WRITE(rkind, rdata, index++, '(');
1280
1281 if (!skip_re) {
Victor Stinnerafbaa202011-09-28 21:50:16 +02001282 err = fill_number(result, index, &re_spec,
1283 re_unicode_tmp, i_re, i_re + n_re_digits,
1284 NULL, 0,
1285 0,
1286 &locale, 0);
1287 if (err) {
1288 Py_CLEAR(result);
1289 goto done;
1290 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 index += n_re_total;
1292 }
Victor Stinnerafbaa202011-09-28 21:50:16 +02001293 err = fill_number(result, index, &im_spec,
1294 im_unicode_tmp, i_im, i_im + n_im_digits,
1295 NULL, 0,
1296 0,
1297 &locale, 0);
1298 if (err) {
1299 Py_CLEAR(result);
1300 goto done;
1301 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 index += n_im_total;
1303 PyUnicode_WRITE(rkind, rdata, index++, 'j');
1304
1305 if (add_parens)
1306 PyUnicode_WRITE(rkind, rdata, index++, ')');
1307
1308done:
1309 PyMem_Free(re_buf);
1310 PyMem_Free(im_buf);
1311 Py_XDECREF(re_unicode_tmp);
1312 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001313 free_locale_info(&locale);
Victor Stinnered277852012-02-01 00:22:23 +01001314 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001315 return result;
1316}
1317
1318/************************************************************************/
1319/*********** built in formatters ****************************************/
1320/************************************************************************/
1321PyObject *
1322_PyUnicode_FormatAdvanced(PyObject *obj,
1323 PyObject *format_spec,
1324 Py_ssize_t start, Py_ssize_t end)
1325{
1326 InternalFormatSpec format;
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001327 PyObject *result;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328
1329 /* check for the special case of zero length format spec, make
1330 it equivalent to str(obj) */
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001331 if (start == end)
1332 return PyObject_Str(obj);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333
1334 /* parse the format_spec */
1335 if (!parse_internal_render_format_spec(format_spec, start, end,
1336 &format, 's', '<'))
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001337 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338
1339 /* type conversion? */
1340 switch (format.type) {
1341 case 's':
1342 /* no type conversion needed, already a string. do the formatting */
1343 result = format_string_internal(obj, &format);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001344 if (result != NULL)
1345 assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346 break;
1347 default:
1348 /* unknown */
1349 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001350 result = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001352 return result;
1353}
1354
1355static PyObject*
1356format_int_or_long(PyObject* obj, PyObject* format_spec,
1357 Py_ssize_t start, Py_ssize_t end,
1358 IntOrLongToString tostring)
1359{
1360 PyObject *result = NULL;
1361 PyObject *tmp = NULL;
1362 InternalFormatSpec format;
1363
1364 /* check for the special case of zero length format spec, make
1365 it equivalent to str(obj) */
1366 if (start == end) {
1367 result = PyObject_Str(obj);
1368 goto done;
1369 }
1370
1371 /* parse the format_spec */
1372 if (!parse_internal_render_format_spec(format_spec, start, end,
1373 &format, 'd', '>'))
1374 goto done;
1375
1376 /* type conversion? */
1377 switch (format.type) {
1378 case 'b':
1379 case 'c':
1380 case 'd':
1381 case 'o':
1382 case 'x':
1383 case 'X':
1384 case 'n':
1385 /* no type conversion needed, already an int (or long). do
1386 the formatting */
1387 result = format_int_or_long_internal(obj, &format, tostring);
1388 break;
1389
1390 case 'e':
1391 case 'E':
1392 case 'f':
1393 case 'F':
1394 case 'g':
1395 case 'G':
1396 case '%':
1397 /* convert to float */
1398 tmp = PyNumber_Float(obj);
1399 if (tmp == NULL)
1400 goto done;
1401 result = format_float_internal(tmp, &format);
1402 break;
1403
1404 default:
1405 /* unknown */
1406 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1407 goto done;
1408 }
1409
1410done:
1411 Py_XDECREF(tmp);
1412 return result;
1413}
1414
1415/* Need to define long_format as a function that will convert a long
1416 to a string. In 3.0, _PyLong_Format has the correct signature. */
1417#define long_format _PyLong_Format
1418
1419PyObject *
1420_PyLong_FormatAdvanced(PyObject *obj,
1421 PyObject *format_spec,
1422 Py_ssize_t start, Py_ssize_t end)
1423{
1424 return format_int_or_long(obj, format_spec, start, end,
1425 long_format);
1426}
1427
1428PyObject *
1429_PyFloat_FormatAdvanced(PyObject *obj,
1430 PyObject *format_spec,
1431 Py_ssize_t start, Py_ssize_t end)
1432{
1433 PyObject *result = NULL;
1434 InternalFormatSpec format;
1435
1436 /* check for the special case of zero length format spec, make
1437 it equivalent to str(obj) */
1438 if (start == end) {
1439 result = PyObject_Str(obj);
1440 goto done;
1441 }
1442
1443 /* parse the format_spec */
1444 if (!parse_internal_render_format_spec(format_spec, start, end,
1445 &format, '\0', '>'))
1446 goto done;
1447
1448 /* type conversion? */
1449 switch (format.type) {
1450 case '\0': /* No format code: like 'g', but with at least one decimal. */
1451 case 'e':
1452 case 'E':
1453 case 'f':
1454 case 'F':
1455 case 'g':
1456 case 'G':
1457 case 'n':
1458 case '%':
1459 /* no conversion, already a float. do the formatting */
1460 result = format_float_internal(obj, &format);
1461 break;
1462
1463 default:
1464 /* unknown */
1465 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1466 goto done;
1467 }
1468
1469done:
1470 return result;
1471}
1472
1473PyObject *
1474_PyComplex_FormatAdvanced(PyObject *obj,
1475 PyObject *format_spec,
1476 Py_ssize_t start, Py_ssize_t end)
1477{
1478 PyObject *result = NULL;
1479 InternalFormatSpec format;
1480
1481 /* check for the special case of zero length format spec, make
1482 it equivalent to str(obj) */
1483 if (start == end) {
1484 result = PyObject_Str(obj);
1485 goto done;
1486 }
1487
1488 /* parse the format_spec */
1489 if (!parse_internal_render_format_spec(format_spec, start, end,
1490 &format, '\0', '>'))
1491 goto done;
1492
1493 /* type conversion? */
1494 switch (format.type) {
1495 case '\0': /* No format code: like 'g', but with at least one decimal. */
1496 case 'e':
1497 case 'E':
1498 case 'f':
1499 case 'F':
1500 case 'g':
1501 case 'G':
1502 case 'n':
1503 /* no conversion, already a complex. do the formatting */
1504 result = format_complex_internal(obj, &format);
1505 break;
1506
1507 default:
1508 /* unknown */
1509 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1510 goto done;
1511 }
1512
1513done:
1514 return result;
1515}