blob: e3a814984151b6e85c9afec1c2a090bee0b22513 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400159 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200160
Eric V. Smith2ea97122014-04-14 11:55:10 -0400161 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 format->align = default_align;
163 format->alternate = 0;
164 format->sign = '\0';
165 format->width = -1;
166 format->thousands_separators = 0;
167 format->precision = -1;
168 format->type = default_type;
169
170 /* If the second char is an alignment token,
171 then parse the fill char */
172 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
173 format->align = READ_spec(pos+1);
174 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400175 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 align_specified = 1;
177 pos += 2;
178 }
179 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
180 format->align = READ_spec(pos);
181 align_specified = 1;
182 ++pos;
183 }
184
185 /* Parse the various sign options */
186 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
187 format->sign = READ_spec(pos);
188 ++pos;
189 }
190
191 /* If the next character is #, we're in alternate mode. This only
192 applies to integers. */
193 if (end-pos >= 1 && READ_spec(pos) == '#') {
194 format->alternate = 1;
195 ++pos;
196 }
197
198 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400199 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 format->fill_char = '0';
201 if (!align_specified) {
202 format->align = '=';
203 }
204 ++pos;
205 }
206
207 consumed = get_integer(format_spec, &pos, end, &format->width);
208 if (consumed == -1)
209 /* Overflow error. Exception already set. */
210 return 0;
211
212 /* If consumed is 0, we didn't consume any characters for the
213 width. In that case, reset the width to -1, because
214 get_integer() will have set it to zero. -1 is how we record
215 that the width wasn't specified. */
216 if (consumed == 0)
217 format->width = -1;
218
219 /* Comma signifies add thousands separators */
220 if (end-pos && READ_spec(pos) == ',') {
221 format->thousands_separators = 1;
222 ++pos;
223 }
224
225 /* Parse field precision */
226 if (end-pos && READ_spec(pos) == '.') {
227 ++pos;
228
229 consumed = get_integer(format_spec, &pos, end, &format->precision);
230 if (consumed == -1)
231 /* Overflow error. Exception already set. */
232 return 0;
233
234 /* Not having a precision after a dot is an error. */
235 if (consumed == 0) {
236 PyErr_Format(PyExc_ValueError,
237 "Format specifier missing precision");
238 return 0;
239 }
240
241 }
242
243 /* Finally, parse the type field. */
244
245 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500246 /* More than one char remain, invalid format specifier. */
247 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200248 return 0;
249 }
250
251 if (end-pos == 1) {
252 format->type = READ_spec(pos);
253 ++pos;
254 }
255
256 /* Do as much validating as we can, just by looking at the format
257 specifier. Do not take into account what type of formatting
258 we're doing (int, float, string). */
259
260 if (format->thousands_separators) {
261 switch (format->type) {
262 case 'd':
263 case 'e':
264 case 'f':
265 case 'g':
266 case 'E':
267 case 'G':
268 case '%':
269 case 'F':
270 case '\0':
271 /* These are allowed. See PEP 378.*/
272 break;
273 default:
274 invalid_comma_type(format->type);
275 return 0;
276 }
277 }
278
Victor Stinnera4ac6002012-01-21 15:50:49 +0100279 assert (format->align <= 127);
280 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200281 return 1;
282}
283
284/* Calculate the padding needed. */
285static void
286calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
287 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
288 Py_ssize_t *n_total)
289{
290 if (width >= 0) {
291 if (nchars > width)
292 *n_total = nchars;
293 else
294 *n_total = width;
295 }
296 else {
297 /* not specified, use all of the chars and no more */
298 *n_total = nchars;
299 }
300
301 /* Figure out how much leading space we need, based on the
302 aligning */
303 if (align == '>')
304 *n_lpadding = *n_total - nchars;
305 else if (align == '^')
306 *n_lpadding = (*n_total - nchars) / 2;
307 else if (align == '<' || align == '=')
308 *n_lpadding = 0;
309 else {
310 /* We should never have an unspecified alignment. */
311 *n_lpadding = 0;
312 assert(0);
313 }
314
315 *n_rpadding = *n_total - nchars - *n_lpadding;
316}
317
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318/* Do the padding, and return a pointer to where the caller-supplied
319 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200320static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200321fill_padding(_PyUnicodeWriter *writer,
322 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
324 Py_ssize_t n_rpadding)
325{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200326 Py_ssize_t pos;
327
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200329 if (n_lpadding) {
330 pos = writer->pos;
331 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
332 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333
334 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200335 if (n_rpadding) {
336 pos = writer->pos + nchars + n_lpadding;
337 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
338 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200339
340 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200341 writer->pos += n_lpadding;
342 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200343}
344
345/************************************************************************/
346/*********** common routines for numeric formatting *********************/
347/************************************************************************/
348
349/* Locale type codes. */
350#define LT_CURRENT_LOCALE 0
351#define LT_DEFAULT_LOCALE 1
352#define LT_NO_LOCALE 2
353
354/* Locale info needed for formatting integers and the part of floats
355 before and including the decimal. Note that locales only support
356 8-bit chars, not unicode. */
357typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100358 PyObject *decimal_point;
359 PyObject *thousands_sep;
360 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200361} LocaleInfo;
362
Victor Stinner41a863c2012-02-24 00:37:51 +0100363#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
364
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200365/* describes the layout for an integer, see the comment in
366 calc_number_widths() for details */
367typedef struct {
368 Py_ssize_t n_lpadding;
369 Py_ssize_t n_prefix;
370 Py_ssize_t n_spadding;
371 Py_ssize_t n_rpadding;
372 char sign;
373 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
374 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
375 any grouping chars. */
376 Py_ssize_t n_decimal; /* 0 if only an integer */
377 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
378 excluding the decimal itself, if
379 present. */
380
381 /* These 2 are not the widths of fields, but are needed by
382 STRINGLIB_GROUPING. */
383 Py_ssize_t n_digits; /* The number of digits before a decimal
384 or exponent. */
385 Py_ssize_t n_min_width; /* The min_width we used when we computed
386 the n_grouped_digits width. */
387} NumberFieldWidths;
388
389
390/* Given a number of the form:
391 digits[remainder]
392 where ptr points to the start and end points to the end, find where
393 the integer part ends. This could be a decimal, an exponent, both,
394 or neither.
395 If a decimal point is present, set *has_decimal and increment
396 remainder beyond it.
397 Results are undefined (but shouldn't crash) for improperly
398 formatted strings.
399*/
400static void
401parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
402 Py_ssize_t *n_remainder, int *has_decimal)
403{
404 Py_ssize_t remainder;
405
Antoine Pitrouc73c5612013-02-09 23:14:42 +0100406 while (pos<end && Py_ISDIGIT(PyUnicode_READ_CHAR(s, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 ++pos;
408 remainder = pos;
409
410 /* Does remainder start with a decimal point? */
411 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
412
413 /* Skip the decimal point. */
414 if (*has_decimal)
415 remainder++;
416
417 *n_remainder = end - remainder;
418}
419
420/* not all fields of format are used. for example, precision is
421 unused. should this take discrete params in order to be more clear
422 about what it does? or is passing a single format parameter easier
423 and more efficient enough to justify a little obfuscation? */
424static Py_ssize_t
425calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
426 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
427 Py_ssize_t n_end, Py_ssize_t n_remainder,
428 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100429 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430{
431 Py_ssize_t n_non_digit_non_padding;
432 Py_ssize_t n_padding;
433
434 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
435 spec->n_lpadding = 0;
436 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100437 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 spec->n_remainder = n_remainder;
439 spec->n_spadding = 0;
440 spec->n_rpadding = 0;
441 spec->sign = '\0';
442 spec->n_sign = 0;
443
444 /* the output will look like:
445 | |
446 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
447 | |
448
449 sign is computed from format->sign and the actual
450 sign of the number
451
452 prefix is given (it's for the '0x' prefix)
453
454 digits is already known
455
456 the total width is either given, or computed from the
457 actual digits
458
459 only one of lpadding, spadding, and rpadding can be non-zero,
460 and it's calculated from the width and other fields
461 */
462
463 /* compute the various parts we're going to write */
464 switch (format->sign) {
465 case '+':
466 /* always put a + or - */
467 spec->n_sign = 1;
468 spec->sign = (sign_char == '-' ? '-' : '+');
469 break;
470 case ' ':
471 spec->n_sign = 1;
472 spec->sign = (sign_char == '-' ? '-' : ' ');
473 break;
474 default:
475 /* Not specified, or the default (-) */
476 if (sign_char == '-') {
477 spec->n_sign = 1;
478 spec->sign = '-';
479 }
480 }
481
482 /* The number of chars used for non-digits and non-padding. */
483 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
484 spec->n_remainder;
485
486 /* min_width can go negative, that's okay. format->width == -1 means
487 we don't care. */
488 if (format->fill_char == '0' && format->align == '=')
489 spec->n_min_width = format->width - n_non_digit_non_padding;
490 else
491 spec->n_min_width = 0;
492
493 if (spec->n_digits == 0)
494 /* This case only occurs when using 'c' formatting, we need
495 to special case it because the grouping code always wants
496 to have at least one character. */
497 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100498 else {
499 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200500 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100501 NULL, 0,
502 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200503 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100504 locale->grouping, locale->thousands_sep, &grouping_maxchar);
505 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
506 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200507
508 /* Given the desired width and the total of digit and non-digit
509 space we consume, see if we need any padding. format->width can
510 be negative (meaning no padding), but this code still works in
511 that case. */
512 n_padding = format->width -
513 (n_non_digit_non_padding + spec->n_grouped_digits);
514 if (n_padding > 0) {
515 /* Some padding is needed. Determine if it's left, space, or right. */
516 switch (format->align) {
517 case '<':
518 spec->n_rpadding = n_padding;
519 break;
520 case '^':
521 spec->n_lpadding = n_padding / 2;
522 spec->n_rpadding = n_padding - spec->n_lpadding;
523 break;
524 case '=':
525 spec->n_spadding = n_padding;
526 break;
527 case '>':
528 spec->n_lpadding = n_padding;
529 break;
530 default:
531 /* Shouldn't get here, but treat it as '>' */
532 spec->n_lpadding = n_padding;
533 assert(0);
534 break;
535 }
536 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100537
538 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
539 *maxchar = Py_MAX(*maxchar, format->fill_char);
540
Victor Stinner90f50d42012-02-24 01:44:47 +0100541 if (spec->n_decimal)
542 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
543
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
545 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
546 spec->n_remainder + spec->n_rpadding;
547}
548
549/* Fill in the digit parts of a numbers's string representation,
550 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200551 Return -1 on error, or 0 on success. */
552static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200553fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200554 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200555 PyObject *prefix, Py_ssize_t p_start,
556 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200557 LocaleInfo *locale, int toupper)
558{
559 /* Used to keep track of digits, decimal, and remainder. */
560 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200561 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200562 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200563 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200564
565 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200566 _PyUnicode_FastFill(writer->buffer,
567 writer->pos, spec->n_lpadding, fill_char);
568 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200569 }
570 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200571 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
572 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200573 }
574 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200575 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
576 prefix, p_start,
577 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200578 if (toupper) {
579 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500580 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200581 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100582 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100583 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200584 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500585 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200587 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200588 }
589 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200590 _PyUnicode_FastFill(writer->buffer,
591 writer->pos, spec->n_spadding, fill_char);
592 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200593 }
594
595 /* Only for type 'c' special case, it has no digits. */
596 if (spec->n_digits != 0) {
597 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200598 char *pdigits;
599 if (PyUnicode_READY(digits))
600 return -1;
601 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200602 if (PyUnicode_KIND(digits) < kind) {
603 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200604 if (pdigits == NULL)
605 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100607 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200608 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200610 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200611 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100612 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100613 if (r == -1)
614 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200615 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616 if (PyUnicode_KIND(digits) < kind)
617 PyMem_Free(pdigits);
618 d_pos += spec->n_digits;
619 }
620 if (toupper) {
621 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500622 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200623 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100624 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500625 if (c > 127) {
626 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
627 return -1;
628 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200629 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500630 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200631 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200632 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633
634 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200635 _PyUnicode_FastCopyCharacters(
636 writer->buffer, writer->pos,
637 locale->decimal_point, 0, spec->n_decimal);
638 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 d_pos += 1;
640 }
641
642 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200643 _PyUnicode_FastCopyCharacters(
644 writer->buffer, writer->pos,
645 digits, d_pos, spec->n_remainder);
646 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400647 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 }
649
650 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200651 _PyUnicode_FastFill(writer->buffer,
652 writer->pos, spec->n_rpadding,
653 fill_char);
654 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200656 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200657}
658
659static char no_grouping[1] = {CHAR_MAX};
660
661/* Find the decimal point character(s?), thousands_separator(s?), and
662 grouping description, either for the current locale if type is
663 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
664 none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100665static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200666get_locale_info(int type, LocaleInfo *locale_info)
667{
668 switch (type) {
669 case LT_CURRENT_LOCALE: {
670 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100671 locale_info->decimal_point = PyUnicode_DecodeLocale(
672 locale_data->decimal_point,
673 NULL);
674 if (locale_info->decimal_point == NULL)
675 return -1;
676 locale_info->thousands_sep = PyUnicode_DecodeLocale(
677 locale_data->thousands_sep,
678 NULL);
679 if (locale_info->thousands_sep == NULL) {
680 Py_DECREF(locale_info->decimal_point);
681 return -1;
682 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200683 locale_info->grouping = locale_data->grouping;
684 break;
685 }
686 case LT_DEFAULT_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100687 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
688 locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
689 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
690 Py_XDECREF(locale_info->decimal_point);
691 Py_XDECREF(locale_info->thousands_sep);
692 return -1;
693 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694 locale_info->grouping = "\3"; /* Group every 3 characters. The
695 (implicit) trailing 0 means repeat
696 infinitely. */
697 break;
698 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100699 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
700 locale_info->thousands_sep = PyUnicode_New(0, 0);
701 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
702 Py_XDECREF(locale_info->decimal_point);
703 Py_XDECREF(locale_info->thousands_sep);
704 return -1;
705 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706 locale_info->grouping = no_grouping;
707 break;
708 default:
709 assert(0);
710 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100711 return 0;
712}
713
714static void
715free_locale_info(LocaleInfo *locale_info)
716{
717 Py_XDECREF(locale_info->decimal_point);
718 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200719}
720
721/************************************************************************/
722/*********** string formatting ******************************************/
723/************************************************************************/
724
Victor Stinnerd3f08822012-05-29 12:57:52 +0200725static int
726format_string_internal(PyObject *value, const InternalFormatSpec *format,
727 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728{
729 Py_ssize_t lpad;
730 Py_ssize_t rpad;
731 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200732 Py_ssize_t len;
733 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200734 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735
Victor Stinnerd3f08822012-05-29 12:57:52 +0200736 assert(PyUnicode_IS_READY(value));
737 len = PyUnicode_GET_LENGTH(value);
738
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200739 /* sign is not allowed on strings */
740 if (format->sign != '\0') {
741 PyErr_SetString(PyExc_ValueError,
742 "Sign not allowed in string format specifier");
743 goto done;
744 }
745
746 /* alternate is not allowed on strings */
747 if (format->alternate) {
748 PyErr_SetString(PyExc_ValueError,
749 "Alternate form (#) not allowed in string format "
750 "specifier");
751 goto done;
752 }
753
754 /* '=' alignment not allowed on strings */
755 if (format->align == '=') {
756 PyErr_SetString(PyExc_ValueError,
757 "'=' alignment not allowed "
758 "in string format specifier");
759 goto done;
760 }
761
Victor Stinner621ef3d2012-10-02 00:33:47 +0200762 if ((format->width == -1 || format->width <= len)
763 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200764 /* Fast path */
765 return _PyUnicodeWriter_WriteStr(writer, value);
766 }
767
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768 /* if precision is specified, output no more that format.precision
769 characters */
770 if (format->precision >= 0 && len >= format->precision) {
771 len = format->precision;
772 }
773
774 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
775
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200776 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100777 if (lpad != 0 || rpad != 0)
778 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200779 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
780 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
781 maxchar = Py_MAX(maxchar, valmaxchar);
782 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100783
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200784 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200785 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200786 goto done;
787
788 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400789 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200790 if (result == -1)
791 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792
793 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200794 if (len) {
795 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
796 value, 0, len);
797 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200798 writer->pos += (len + rpad);
799 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200800
801done:
802 return result;
803}
804
805
806/************************************************************************/
807/*********** long formatting ********************************************/
808/************************************************************************/
809
Victor Stinnerd3f08822012-05-29 12:57:52 +0200810static int
811format_long_internal(PyObject *value, const InternalFormatSpec *format,
812 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200813{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200814 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100815 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 PyObject *tmp = NULL;
817 Py_ssize_t inumeric_chars;
818 Py_UCS4 sign_char = '\0';
819 Py_ssize_t n_digits; /* count of digits need from the computed
820 string */
821 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
822 produces non-digits */
823 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
824 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100825 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200826 NumberFieldWidths spec;
827 long x;
828
829 /* Locale settings, either from the actual locale or
830 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100831 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200832
833 /* no precision allowed on integers */
834 if (format->precision != -1) {
835 PyErr_SetString(PyExc_ValueError,
836 "Precision not allowed in integer format specifier");
837 goto done;
838 }
839
840 /* special case for character formatting */
841 if (format->type == 'c') {
842 /* error to specify a sign */
843 if (format->sign != '\0') {
844 PyErr_SetString(PyExc_ValueError,
845 "Sign not allowed with integer"
846 " format specifier 'c'");
847 goto done;
848 }
849
850 /* taken from unicodeobject.c formatchar() */
851 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200852 x = PyLong_AsLong(value);
853 if (x == -1 && PyErr_Occurred())
854 goto done;
855 if (x < 0 || x > 0x10ffff) {
856 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100857 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 goto done;
859 }
860 tmp = PyUnicode_FromOrdinal(x);
861 inumeric_chars = 0;
862 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100863 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864
865 /* As a sort-of hack, we tell calc_number_widths that we only
866 have "remainder" characters. calc_number_widths thinks
867 these are characters that don't get formatted, only copied
868 into the output string. We do this for 'c' formatting,
869 because the characters are likely to be non-digits. */
870 n_remainder = 1;
871 }
872 else {
873 int base;
874 int leading_chars_to_skip = 0; /* Number of characters added by
875 PyNumber_ToBase that we want to
876 skip over. */
877
878 /* Compute the base and how many characters will be added by
879 PyNumber_ToBase */
880 switch (format->type) {
881 case 'b':
882 base = 2;
883 leading_chars_to_skip = 2; /* 0b */
884 break;
885 case 'o':
886 base = 8;
887 leading_chars_to_skip = 2; /* 0o */
888 break;
889 case 'x':
890 case 'X':
891 base = 16;
892 leading_chars_to_skip = 2; /* 0x */
893 break;
894 default: /* shouldn't be needed, but stops a compiler warning */
895 case 'd':
896 case 'n':
897 base = 10;
898 break;
899 }
900
Victor Stinnerd3f08822012-05-29 12:57:52 +0200901 if (format->sign != '+' && format->sign != ' '
902 && format->width == -1
903 && format->type != 'X' && format->type != 'n'
904 && !format->thousands_separators
905 && PyLong_CheckExact(value))
906 {
907 /* Fast path */
908 return _PyLong_FormatWriter(writer, value, base, format->alternate);
909 }
910
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200911 /* The number of prefix chars is the same as the leading
912 chars to skip */
913 if (format->alternate)
914 n_prefix = leading_chars_to_skip;
915
916 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200917 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200918 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
919 goto done;
920
921 inumeric_chars = 0;
922 n_digits = PyUnicode_GET_LENGTH(tmp);
923
924 prefix = inumeric_chars;
925
926 /* Is a sign character present in the output? If so, remember it
927 and skip it */
928 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
929 sign_char = '-';
930 ++prefix;
931 ++leading_chars_to_skip;
932 }
933
934 /* Skip over the leading chars (0x, 0b, etc.) */
935 n_digits -= leading_chars_to_skip;
936 inumeric_chars += leading_chars_to_skip;
937 }
938
939 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100940 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
941 (format->thousands_separators ?
942 LT_DEFAULT_LOCALE :
943 LT_NO_LOCALE),
944 &locale) == -1)
945 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946
947 /* Calculate how much memory we'll need. */
948 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100949 inumeric_chars + n_digits, n_remainder, 0,
950 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100951
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200953 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 goto done;
955
956 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200957 result = fill_number(writer, &spec,
958 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -0400959 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +0200960 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200961
962done:
963 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +0100964 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 return result;
966}
967
968/************************************************************************/
969/*********** float formatting *******************************************/
970/************************************************************************/
971
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200972/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200973static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +0200975 const InternalFormatSpec *format,
976 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977{
978 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
979 Py_ssize_t n_digits;
980 Py_ssize_t n_remainder;
981 Py_ssize_t n_total;
982 int has_decimal;
983 double val;
Victor Stinner76d38502013-06-24 23:34:15 +0200984 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200985 Py_UCS4 type = format->type;
986 int add_pct = 0;
987 Py_ssize_t index;
988 NumberFieldWidths spec;
989 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200990 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100991 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992 Py_UCS4 sign_char = '\0';
993 int float_type; /* Used to see if we have a nan, inf, or regular float. */
994 PyObject *unicode_tmp = NULL;
995
996 /* Locale settings, either from the actual locale or
997 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100998 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999
Victor Stinner2f084ec2013-06-23 14:54:30 +02001000 if (format->precision > INT_MAX) {
1001 PyErr_SetString(PyExc_ValueError, "precision too big");
1002 goto done;
1003 }
1004 precision = (int)format->precision;
1005
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 if (format->alternate)
1007 flags |= Py_DTSF_ALT;
1008
1009 if (type == '\0') {
1010 /* Omitted type specifier. Behaves in the same way as repr(x)
1011 and str(x) if no precision is given, else like 'g', but with
1012 at least one digit after the decimal point. */
1013 flags |= Py_DTSF_ADD_DOT_0;
1014 type = 'r';
1015 default_precision = 0;
1016 }
1017
1018 if (type == 'n')
1019 /* 'n' is the same as 'g', except for the locale used to
1020 format the result. We take care of that later. */
1021 type = 'g';
1022
1023 val = PyFloat_AsDouble(value);
1024 if (val == -1.0 && PyErr_Occurred())
1025 goto done;
1026
1027 if (type == '%') {
1028 type = 'f';
1029 val *= 100;
1030 add_pct = 1;
1031 }
1032
1033 if (precision < 0)
1034 precision = default_precision;
1035 else if (type == 'r')
1036 type = 'g';
1037
1038 /* Cast "type", because if we're in unicode we need to pass a
1039 8-bit char. This is safe, because we've restricted what "type"
1040 can be. */
1041 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1042 &float_type);
1043 if (buf == NULL)
1044 goto done;
1045 n_digits = strlen(buf);
1046
1047 if (add_pct) {
1048 /* We know that buf has a trailing zero (since we just called
1049 strlen() on it), and we don't use that fact any more. So we
1050 can just write over the trailing zero. */
1051 buf[n_digits] = '%';
1052 n_digits += 1;
1053 }
1054
Victor Stinnerd3f08822012-05-29 12:57:52 +02001055 if (format->sign != '+' && format->sign != ' '
1056 && format->width == -1
1057 && format->type != 'n'
1058 && !format->thousands_separators)
1059 {
1060 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001061 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1062 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001063 return result;
1064 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001065
Victor Stinner4a587072013-11-19 12:54:53 +01001066 /* Since there is no unicode version of PyOS_double_to_string,
1067 just use the 8 bit version and then convert to unicode. */
1068 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1069 PyMem_Free(buf);
1070 if (unicode_tmp == NULL)
1071 goto done;
1072
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001073 /* Is a sign character present in the output? If so, remember it
1074 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001075 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1077 sign_char = '-';
1078 ++index;
1079 --n_digits;
1080 }
1081
1082 /* Determine if we have any "remainder" (after the digits, might include
1083 decimal or exponent or both (or neither)) */
1084 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1085
1086 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001087 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1088 (format->thousands_separators ?
1089 LT_DEFAULT_LOCALE :
1090 LT_NO_LOCALE),
1091 &locale) == -1)
1092 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001093
1094 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001095 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001096 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001097 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001098
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001099 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001100 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001101 goto done;
1102
1103 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001104 result = fill_number(writer, &spec,
1105 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001106 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001107 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001108
1109done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001110 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001111 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001112 return result;
1113}
1114
1115/************************************************************************/
1116/*********** complex formatting *****************************************/
1117/************************************************************************/
1118
Victor Stinnerd3f08822012-05-29 12:57:52 +02001119static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001121 const InternalFormatSpec *format,
1122 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123{
1124 double re;
1125 double im;
1126 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1127 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1128
1129 InternalFormatSpec tmp_format = *format;
1130 Py_ssize_t n_re_digits;
1131 Py_ssize_t n_im_digits;
1132 Py_ssize_t n_re_remainder;
1133 Py_ssize_t n_im_remainder;
1134 Py_ssize_t n_re_total;
1135 Py_ssize_t n_im_total;
1136 int re_has_decimal;
1137 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001138 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 Py_UCS4 type = format->type;
1140 Py_ssize_t i_re;
1141 Py_ssize_t i_im;
1142 NumberFieldWidths re_spec;
1143 NumberFieldWidths im_spec;
1144 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001145 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001146 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001147 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 Py_UCS4 re_sign_char = '\0';
1150 Py_UCS4 im_sign_char = '\0';
1151 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1152 int im_float_type;
1153 int add_parens = 0;
1154 int skip_re = 0;
1155 Py_ssize_t lpad;
1156 Py_ssize_t rpad;
1157 Py_ssize_t total;
1158 PyObject *re_unicode_tmp = NULL;
1159 PyObject *im_unicode_tmp = NULL;
1160
1161 /* Locale settings, either from the actual locale or
1162 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001163 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001164
Victor Stinner2f084ec2013-06-23 14:54:30 +02001165 if (format->precision > INT_MAX) {
1166 PyErr_SetString(PyExc_ValueError, "precision too big");
1167 goto done;
1168 }
1169 precision = (int)format->precision;
1170
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001171 /* Zero padding is not allowed. */
1172 if (format->fill_char == '0') {
1173 PyErr_SetString(PyExc_ValueError,
1174 "Zero padding is not allowed in complex format "
1175 "specifier");
1176 goto done;
1177 }
1178
1179 /* Neither is '=' alignment . */
1180 if (format->align == '=') {
1181 PyErr_SetString(PyExc_ValueError,
1182 "'=' alignment flag is not allowed in complex format "
1183 "specifier");
1184 goto done;
1185 }
1186
1187 re = PyComplex_RealAsDouble(value);
1188 if (re == -1.0 && PyErr_Occurred())
1189 goto done;
1190 im = PyComplex_ImagAsDouble(value);
1191 if (im == -1.0 && PyErr_Occurred())
1192 goto done;
1193
1194 if (format->alternate)
1195 flags |= Py_DTSF_ALT;
1196
1197 if (type == '\0') {
1198 /* Omitted type specifier. Should be like str(self). */
1199 type = 'r';
1200 default_precision = 0;
1201 if (re == 0.0 && copysign(1.0, re) == 1.0)
1202 skip_re = 1;
1203 else
1204 add_parens = 1;
1205 }
1206
1207 if (type == 'n')
1208 /* 'n' is the same as 'g', except for the locale used to
1209 format the result. We take care of that later. */
1210 type = 'g';
1211
1212 if (precision < 0)
1213 precision = default_precision;
1214 else if (type == 'r')
1215 type = 'g';
1216
1217 /* Cast "type", because if we're in unicode we need to pass a
1218 8-bit char. This is safe, because we've restricted what "type"
1219 can be. */
1220 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1221 &re_float_type);
1222 if (re_buf == NULL)
1223 goto done;
1224 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1225 &im_float_type);
1226 if (im_buf == NULL)
1227 goto done;
1228
1229 n_re_digits = strlen(re_buf);
1230 n_im_digits = strlen(im_buf);
1231
1232 /* Since there is no unicode version of PyOS_double_to_string,
1233 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001234 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001235 if (re_unicode_tmp == NULL)
1236 goto done;
1237 i_re = 0;
1238
Victor Stinnerd3f08822012-05-29 12:57:52 +02001239 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001240 if (im_unicode_tmp == NULL)
1241 goto done;
1242 i_im = 0;
1243
1244 /* Is a sign character present in the output? If so, remember it
1245 and skip it */
1246 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1247 re_sign_char = '-';
1248 ++i_re;
1249 --n_re_digits;
1250 }
1251 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1252 im_sign_char = '-';
1253 ++i_im;
1254 --n_im_digits;
1255 }
1256
1257 /* Determine if we have any "remainder" (after the digits, might include
1258 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001259 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001260 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001261 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 &n_im_remainder, &im_has_decimal);
1263
1264 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001265 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1266 (format->thousands_separators ?
1267 LT_DEFAULT_LOCALE :
1268 LT_NO_LOCALE),
1269 &locale) == -1)
1270 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001271
1272 /* Turn off any padding. We'll do it later after we've composed
1273 the numbers without padding. */
1274 tmp_format.fill_char = '\0';
1275 tmp_format.align = '<';
1276 tmp_format.width = -1;
1277
1278 /* Calculate how much memory we'll need. */
1279 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1280 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001281 re_has_decimal, &locale, &tmp_format,
1282 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283
1284 /* Same formatting, but always include a sign, unless the real part is
1285 * going to be omitted, in which case we use whatever sign convention was
1286 * requested by the original format. */
1287 if (!skip_re)
1288 tmp_format.sign = '+';
1289 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1290 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001291 im_has_decimal, &locale, &tmp_format,
1292 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293
1294 if (skip_re)
1295 n_re_total = 0;
1296
1297 /* Add 1 for the 'j', and optionally 2 for parens. */
1298 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1299 format->width, format->align, &lpad, &rpad, &total);
1300
Victor Stinner41a863c2012-02-24 00:37:51 +01001301 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001302 maxchar = Py_MAX(maxchar, format->fill_char);
1303
Victor Stinnerd3f08822012-05-29 12:57:52 +02001304 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001306 rkind = writer->kind;
1307 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308
1309 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001310 result = fill_padding(writer,
1311 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001312 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001313 if (result == -1)
1314 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001315
Victor Stinnerd3f08822012-05-29 12:57:52 +02001316 if (add_parens) {
1317 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1318 writer->pos++;
1319 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320
1321 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001322 result = fill_number(writer, &re_spec,
1323 re_unicode_tmp, i_re, i_re + n_re_digits,
1324 NULL, 0,
1325 0,
1326 &locale, 0);
1327 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001328 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001330 result = fill_number(writer, &im_spec,
1331 im_unicode_tmp, i_im, i_im + n_im_digits,
1332 NULL, 0,
1333 0,
1334 &locale, 0);
1335 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001336 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001337 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1338 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339
Victor Stinnerd3f08822012-05-29 12:57:52 +02001340 if (add_parens) {
1341 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1342 writer->pos++;
1343 }
1344
1345 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346
1347done:
1348 PyMem_Free(re_buf);
1349 PyMem_Free(im_buf);
1350 Py_XDECREF(re_unicode_tmp);
1351 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001352 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 return result;
1354}
1355
1356/************************************************************************/
1357/*********** built in formatters ****************************************/
1358/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001359static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001360format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1361{
1362 PyObject *str;
1363 int err;
1364
1365 str = PyObject_Str(obj);
1366 if (str == NULL)
1367 return -1;
1368 err = _PyUnicodeWriter_WriteStr(writer, str);
1369 Py_DECREF(str);
1370 return err;
1371}
1372
1373int
1374_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1375 PyObject *obj,
1376 PyObject *format_spec,
1377 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378{
1379 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001380
1381 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001382
1383 /* check for the special case of zero length format spec, make
1384 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001385 if (start == end) {
1386 if (PyUnicode_CheckExact(obj))
1387 return _PyUnicodeWriter_WriteStr(writer, obj);
1388 else
1389 return format_obj(obj, writer);
1390 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001391
1392 /* parse the format_spec */
1393 if (!parse_internal_render_format_spec(format_spec, start, end,
1394 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001395 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001396
1397 /* type conversion? */
1398 switch (format.type) {
1399 case 's':
1400 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001401 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001402 default:
1403 /* unknown */
1404 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001405 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001406 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001407}
1408
Victor Stinnerd3f08822012-05-29 12:57:52 +02001409int
1410_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1411 PyObject *obj,
1412 PyObject *format_spec,
1413 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001414{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001415 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001416 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001417 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001418
1419 /* check for the special case of zero length format spec, make
1420 it equivalent to str(obj) */
1421 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001422 if (PyLong_CheckExact(obj))
1423 return _PyLong_FormatWriter(writer, obj, 10, 0);
1424 else
1425 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001426 }
1427
1428 /* parse the format_spec */
1429 if (!parse_internal_render_format_spec(format_spec, start, end,
1430 &format, 'd', '>'))
1431 goto done;
1432
1433 /* type conversion? */
1434 switch (format.type) {
1435 case 'b':
1436 case 'c':
1437 case 'd':
1438 case 'o':
1439 case 'x':
1440 case 'X':
1441 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001442 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001443 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001444 break;
1445
1446 case 'e':
1447 case 'E':
1448 case 'f':
1449 case 'F':
1450 case 'g':
1451 case 'G':
1452 case '%':
1453 /* convert to float */
1454 tmp = PyNumber_Float(obj);
1455 if (tmp == NULL)
1456 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001457 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001458 break;
1459
1460 default:
1461 /* unknown */
1462 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1463 goto done;
1464 }
1465
1466done:
1467 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001468 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001469 return result;
1470}
1471
Victor Stinnerd3f08822012-05-29 12:57:52 +02001472int
1473_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1474 PyObject *obj,
1475 PyObject *format_spec,
1476 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001477{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001478 InternalFormatSpec format;
1479
1480 /* check for the special case of zero length format spec, make
1481 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001482 if (start == end)
1483 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001484
1485 /* parse the format_spec */
1486 if (!parse_internal_render_format_spec(format_spec, start, end,
1487 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001488 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001489
1490 /* type conversion? */
1491 switch (format.type) {
1492 case '\0': /* No format code: like 'g', but with at least one decimal. */
1493 case 'e':
1494 case 'E':
1495 case 'f':
1496 case 'F':
1497 case 'g':
1498 case 'G':
1499 case 'n':
1500 case '%':
1501 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001502 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001503
1504 default:
1505 /* unknown */
1506 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001507 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001508 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001509}
1510
Victor Stinnerd3f08822012-05-29 12:57:52 +02001511int
1512_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1513 PyObject *obj,
1514 PyObject *format_spec,
1515 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001516{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001517 InternalFormatSpec format;
1518
1519 /* check for the special case of zero length format spec, make
1520 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001521 if (start == end)
1522 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001523
1524 /* parse the format_spec */
1525 if (!parse_internal_render_format_spec(format_spec, start, end,
1526 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001527 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001528
1529 /* type conversion? */
1530 switch (format.type) {
1531 case '\0': /* No format code: like 'g', but with at least one decimal. */
1532 case 'e':
1533 case 'E':
1534 case 'f':
1535 case 'F':
1536 case 'g':
1537 case 'G':
1538 case 'n':
1539 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001540 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001541
1542 default:
1543 /* unknown */
1544 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001545 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001546 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001547}