blob: 0a3cc593d646b90261c7dd075c80d6dd19234921 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
159
160 format->fill_char = '\0';
161 format->align = default_align;
162 format->alternate = 0;
163 format->sign = '\0';
164 format->width = -1;
165 format->thousands_separators = 0;
166 format->precision = -1;
167 format->type = default_type;
168
169 /* If the second char is an alignment token,
170 then parse the fill char */
171 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
172 format->align = READ_spec(pos+1);
173 format->fill_char = READ_spec(pos);
174 align_specified = 1;
175 pos += 2;
176 }
177 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
178 format->align = READ_spec(pos);
179 align_specified = 1;
180 ++pos;
181 }
182
183 /* Parse the various sign options */
184 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
185 format->sign = READ_spec(pos);
186 ++pos;
187 }
188
189 /* If the next character is #, we're in alternate mode. This only
190 applies to integers. */
191 if (end-pos >= 1 && READ_spec(pos) == '#') {
192 format->alternate = 1;
193 ++pos;
194 }
195
196 /* The special case for 0-padding (backwards compat) */
197 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
198 format->fill_char = '0';
199 if (!align_specified) {
200 format->align = '=';
201 }
202 ++pos;
203 }
204
205 consumed = get_integer(format_spec, &pos, end, &format->width);
206 if (consumed == -1)
207 /* Overflow error. Exception already set. */
208 return 0;
209
210 /* If consumed is 0, we didn't consume any characters for the
211 width. In that case, reset the width to -1, because
212 get_integer() will have set it to zero. -1 is how we record
213 that the width wasn't specified. */
214 if (consumed == 0)
215 format->width = -1;
216
217 /* Comma signifies add thousands separators */
218 if (end-pos && READ_spec(pos) == ',') {
219 format->thousands_separators = 1;
220 ++pos;
221 }
222
223 /* Parse field precision */
224 if (end-pos && READ_spec(pos) == '.') {
225 ++pos;
226
227 consumed = get_integer(format_spec, &pos, end, &format->precision);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* Not having a precision after a dot is an error. */
233 if (consumed == 0) {
234 PyErr_Format(PyExc_ValueError,
235 "Format specifier missing precision");
236 return 0;
237 }
238
239 }
240
241 /* Finally, parse the type field. */
242
243 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500244 /* More than one char remain, invalid format specifier. */
245 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 return 0;
247 }
248
249 if (end-pos == 1) {
250 format->type = READ_spec(pos);
251 ++pos;
252 }
253
254 /* Do as much validating as we can, just by looking at the format
255 specifier. Do not take into account what type of formatting
256 we're doing (int, float, string). */
257
258 if (format->thousands_separators) {
259 switch (format->type) {
260 case 'd':
261 case 'e':
262 case 'f':
263 case 'g':
264 case 'E':
265 case 'G':
266 case '%':
267 case 'F':
268 case '\0':
269 /* These are allowed. See PEP 378.*/
270 break;
271 default:
272 invalid_comma_type(format->type);
273 return 0;
274 }
275 }
276
Victor Stinnera4ac6002012-01-21 15:50:49 +0100277 assert (format->align <= 127);
278 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 return 1;
280}
281
282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* Figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else if (align == '<' || align == '=')
306 *n_lpadding = 0;
307 else {
308 /* We should never have an unspecified alignment. */
309 *n_lpadding = 0;
310 assert(0);
311 }
312
313 *n_rpadding = *n_total - nchars - *n_lpadding;
314}
315
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316/* Do the padding, and return a pointer to where the caller-supplied
317 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200318static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200319fill_padding(_PyUnicodeWriter *writer,
320 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
322 Py_ssize_t n_rpadding)
323{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200324 Py_ssize_t pos;
325
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200327 if (n_lpadding) {
328 pos = writer->pos;
329 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331
332 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200333 if (n_rpadding) {
334 pos = writer->pos + nchars + n_lpadding;
335 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
336 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337
338 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200339 writer->pos += n_lpadding;
340 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341}
342
343/************************************************************************/
344/*********** common routines for numeric formatting *********************/
345/************************************************************************/
346
347/* Locale type codes. */
348#define LT_CURRENT_LOCALE 0
349#define LT_DEFAULT_LOCALE 1
350#define LT_NO_LOCALE 2
351
352/* Locale info needed for formatting integers and the part of floats
353 before and including the decimal. Note that locales only support
354 8-bit chars, not unicode. */
355typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100356 PyObject *decimal_point;
357 PyObject *thousands_sep;
358 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359} LocaleInfo;
360
Victor Stinner41a863c2012-02-24 00:37:51 +0100361#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
362
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200363/* describes the layout for an integer, see the comment in
364 calc_number_widths() for details */
365typedef struct {
366 Py_ssize_t n_lpadding;
367 Py_ssize_t n_prefix;
368 Py_ssize_t n_spadding;
369 Py_ssize_t n_rpadding;
370 char sign;
371 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
372 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
373 any grouping chars. */
374 Py_ssize_t n_decimal; /* 0 if only an integer */
375 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
376 excluding the decimal itself, if
377 present. */
378
379 /* These 2 are not the widths of fields, but are needed by
380 STRINGLIB_GROUPING. */
381 Py_ssize_t n_digits; /* The number of digits before a decimal
382 or exponent. */
383 Py_ssize_t n_min_width; /* The min_width we used when we computed
384 the n_grouped_digits width. */
385} NumberFieldWidths;
386
387
388/* Given a number of the form:
389 digits[remainder]
390 where ptr points to the start and end points to the end, find where
391 the integer part ends. This could be a decimal, an exponent, both,
392 or neither.
393 If a decimal point is present, set *has_decimal and increment
394 remainder beyond it.
395 Results are undefined (but shouldn't crash) for improperly
396 formatted strings.
397*/
398static void
399parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
400 Py_ssize_t *n_remainder, int *has_decimal)
401{
402 Py_ssize_t remainder;
403
Antoine Pitrouc73c5612013-02-09 23:14:42 +0100404 while (pos<end && Py_ISDIGIT(PyUnicode_READ_CHAR(s, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 ++pos;
406 remainder = pos;
407
408 /* Does remainder start with a decimal point? */
409 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
410
411 /* Skip the decimal point. */
412 if (*has_decimal)
413 remainder++;
414
415 *n_remainder = end - remainder;
416}
417
418/* not all fields of format are used. for example, precision is
419 unused. should this take discrete params in order to be more clear
420 about what it does? or is passing a single format parameter easier
421 and more efficient enough to justify a little obfuscation? */
422static Py_ssize_t
423calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
424 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
425 Py_ssize_t n_end, Py_ssize_t n_remainder,
426 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100427 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428{
429 Py_ssize_t n_non_digit_non_padding;
430 Py_ssize_t n_padding;
431
432 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
433 spec->n_lpadding = 0;
434 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100435 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 spec->n_remainder = n_remainder;
437 spec->n_spadding = 0;
438 spec->n_rpadding = 0;
439 spec->sign = '\0';
440 spec->n_sign = 0;
441
442 /* the output will look like:
443 | |
444 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
445 | |
446
447 sign is computed from format->sign and the actual
448 sign of the number
449
450 prefix is given (it's for the '0x' prefix)
451
452 digits is already known
453
454 the total width is either given, or computed from the
455 actual digits
456
457 only one of lpadding, spadding, and rpadding can be non-zero,
458 and it's calculated from the width and other fields
459 */
460
461 /* compute the various parts we're going to write */
462 switch (format->sign) {
463 case '+':
464 /* always put a + or - */
465 spec->n_sign = 1;
466 spec->sign = (sign_char == '-' ? '-' : '+');
467 break;
468 case ' ':
469 spec->n_sign = 1;
470 spec->sign = (sign_char == '-' ? '-' : ' ');
471 break;
472 default:
473 /* Not specified, or the default (-) */
474 if (sign_char == '-') {
475 spec->n_sign = 1;
476 spec->sign = '-';
477 }
478 }
479
480 /* The number of chars used for non-digits and non-padding. */
481 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
482 spec->n_remainder;
483
484 /* min_width can go negative, that's okay. format->width == -1 means
485 we don't care. */
486 if (format->fill_char == '0' && format->align == '=')
487 spec->n_min_width = format->width - n_non_digit_non_padding;
488 else
489 spec->n_min_width = 0;
490
491 if (spec->n_digits == 0)
492 /* This case only occurs when using 'c' formatting, we need
493 to special case it because the grouping code always wants
494 to have at least one character. */
495 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100496 else {
497 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100499 NULL, 0,
500 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200501 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100502 locale->grouping, locale->thousands_sep, &grouping_maxchar);
503 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
504 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200505
506 /* Given the desired width and the total of digit and non-digit
507 space we consume, see if we need any padding. format->width can
508 be negative (meaning no padding), but this code still works in
509 that case. */
510 n_padding = format->width -
511 (n_non_digit_non_padding + spec->n_grouped_digits);
512 if (n_padding > 0) {
513 /* Some padding is needed. Determine if it's left, space, or right. */
514 switch (format->align) {
515 case '<':
516 spec->n_rpadding = n_padding;
517 break;
518 case '^':
519 spec->n_lpadding = n_padding / 2;
520 spec->n_rpadding = n_padding - spec->n_lpadding;
521 break;
522 case '=':
523 spec->n_spadding = n_padding;
524 break;
525 case '>':
526 spec->n_lpadding = n_padding;
527 break;
528 default:
529 /* Shouldn't get here, but treat it as '>' */
530 spec->n_lpadding = n_padding;
531 assert(0);
532 break;
533 }
534 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100535
536 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
537 *maxchar = Py_MAX(*maxchar, format->fill_char);
538
Victor Stinner90f50d42012-02-24 01:44:47 +0100539 if (spec->n_decimal)
540 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
541
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
543 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
544 spec->n_remainder + spec->n_rpadding;
545}
546
547/* Fill in the digit parts of a numbers's string representation,
548 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200549 Return -1 on error, or 0 on success. */
550static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200551fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200552 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200553 PyObject *prefix, Py_ssize_t p_start,
554 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555 LocaleInfo *locale, int toupper)
556{
557 /* Used to keep track of digits, decimal, and remainder. */
558 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200559 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200560 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200561 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200562
563 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200564 _PyUnicode_FastFill(writer->buffer,
565 writer->pos, spec->n_lpadding, fill_char);
566 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200567 }
568 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200569 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
570 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 }
572 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200573 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
574 prefix, p_start,
575 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 if (toupper) {
577 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500578 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200579 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100580 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100581 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200582 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500583 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200584 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200585 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 }
587 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200588 _PyUnicode_FastFill(writer->buffer,
589 writer->pos, spec->n_spadding, fill_char);
590 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200591 }
592
593 /* Only for type 'c' special case, it has no digits. */
594 if (spec->n_digits != 0) {
595 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200596 char *pdigits;
597 if (PyUnicode_READY(digits))
598 return -1;
599 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 if (PyUnicode_KIND(digits) < kind) {
601 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200602 if (pdigits == NULL)
603 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200604 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100605 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200606 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200608 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100610 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100611 if (r == -1)
612 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200613 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200614 if (PyUnicode_KIND(digits) < kind)
615 PyMem_Free(pdigits);
616 d_pos += spec->n_digits;
617 }
618 if (toupper) {
619 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500620 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200621 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100622 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500623 if (c > 127) {
624 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
625 return -1;
626 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500628 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200630 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200631
632 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200633 _PyUnicode_FastCopyCharacters(
634 writer->buffer, writer->pos,
635 locale->decimal_point, 0, spec->n_decimal);
636 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200637 d_pos += 1;
638 }
639
640 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200641 _PyUnicode_FastCopyCharacters(
642 writer->buffer, writer->pos,
643 digits, d_pos, spec->n_remainder);
644 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400645 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200646 }
647
648 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200649 _PyUnicode_FastFill(writer->buffer,
650 writer->pos, spec->n_rpadding,
651 fill_char);
652 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200653 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200654 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655}
656
657static char no_grouping[1] = {CHAR_MAX};
658
659/* Find the decimal point character(s?), thousands_separator(s?), and
660 grouping description, either for the current locale if type is
661 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
662 none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100663static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200664get_locale_info(int type, LocaleInfo *locale_info)
665{
666 switch (type) {
667 case LT_CURRENT_LOCALE: {
668 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100669 locale_info->decimal_point = PyUnicode_DecodeLocale(
670 locale_data->decimal_point,
671 NULL);
672 if (locale_info->decimal_point == NULL)
673 return -1;
674 locale_info->thousands_sep = PyUnicode_DecodeLocale(
675 locale_data->thousands_sep,
676 NULL);
677 if (locale_info->thousands_sep == NULL) {
678 Py_DECREF(locale_info->decimal_point);
679 return -1;
680 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200681 locale_info->grouping = locale_data->grouping;
682 break;
683 }
684 case LT_DEFAULT_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100685 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
686 locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
687 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
688 Py_XDECREF(locale_info->decimal_point);
689 Py_XDECREF(locale_info->thousands_sep);
690 return -1;
691 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200692 locale_info->grouping = "\3"; /* Group every 3 characters. The
693 (implicit) trailing 0 means repeat
694 infinitely. */
695 break;
696 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100697 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
698 locale_info->thousands_sep = PyUnicode_New(0, 0);
699 if (!locale_info->decimal_point || !locale_info->thousands_sep) {
700 Py_XDECREF(locale_info->decimal_point);
701 Py_XDECREF(locale_info->thousands_sep);
702 return -1;
703 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200704 locale_info->grouping = no_grouping;
705 break;
706 default:
707 assert(0);
708 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100709 return 0;
710}
711
712static void
713free_locale_info(LocaleInfo *locale_info)
714{
715 Py_XDECREF(locale_info->decimal_point);
716 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200717}
718
719/************************************************************************/
720/*********** string formatting ******************************************/
721/************************************************************************/
722
Victor Stinnerd3f08822012-05-29 12:57:52 +0200723static int
724format_string_internal(PyObject *value, const InternalFormatSpec *format,
725 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726{
727 Py_ssize_t lpad;
728 Py_ssize_t rpad;
729 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200730 Py_ssize_t len;
731 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200732 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733
Victor Stinnerd3f08822012-05-29 12:57:52 +0200734 assert(PyUnicode_IS_READY(value));
735 len = PyUnicode_GET_LENGTH(value);
736
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200737 /* sign is not allowed on strings */
738 if (format->sign != '\0') {
739 PyErr_SetString(PyExc_ValueError,
740 "Sign not allowed in string format specifier");
741 goto done;
742 }
743
744 /* alternate is not allowed on strings */
745 if (format->alternate) {
746 PyErr_SetString(PyExc_ValueError,
747 "Alternate form (#) not allowed in string format "
748 "specifier");
749 goto done;
750 }
751
752 /* '=' alignment not allowed on strings */
753 if (format->align == '=') {
754 PyErr_SetString(PyExc_ValueError,
755 "'=' alignment not allowed "
756 "in string format specifier");
757 goto done;
758 }
759
Victor Stinner621ef3d2012-10-02 00:33:47 +0200760 if ((format->width == -1 || format->width <= len)
761 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200762 /* Fast path */
763 return _PyUnicodeWriter_WriteStr(writer, value);
764 }
765
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200766 /* if precision is specified, output no more that format.precision
767 characters */
768 if (format->precision >= 0 && len >= format->precision) {
769 len = format->precision;
770 }
771
772 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
773
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200774 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100775 if (lpad != 0 || rpad != 0)
776 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200777 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
778 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
779 maxchar = Py_MAX(maxchar, valmaxchar);
780 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100781
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200782 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200783 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200784 goto done;
785
786 /* Write into that space. First the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200787 result = fill_padding(writer, len,
788 format->fill_char=='\0'?' ':format->fill_char,
789 lpad, rpad);
790 if (result == -1)
791 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792
793 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200794 if (len) {
795 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
796 value, 0, len);
797 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200798 writer->pos += (len + rpad);
799 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200800
801done:
802 return result;
803}
804
805
806/************************************************************************/
807/*********** long formatting ********************************************/
808/************************************************************************/
809
Victor Stinnerd3f08822012-05-29 12:57:52 +0200810static int
811format_long_internal(PyObject *value, const InternalFormatSpec *format,
812 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200813{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200814 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100815 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 PyObject *tmp = NULL;
817 Py_ssize_t inumeric_chars;
818 Py_UCS4 sign_char = '\0';
819 Py_ssize_t n_digits; /* count of digits need from the computed
820 string */
821 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
822 produces non-digits */
823 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
824 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100825 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200826 NumberFieldWidths spec;
827 long x;
828
829 /* Locale settings, either from the actual locale or
830 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100831 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200832
833 /* no precision allowed on integers */
834 if (format->precision != -1) {
835 PyErr_SetString(PyExc_ValueError,
836 "Precision not allowed in integer format specifier");
837 goto done;
838 }
839
840 /* special case for character formatting */
841 if (format->type == 'c') {
842 /* error to specify a sign */
843 if (format->sign != '\0') {
844 PyErr_SetString(PyExc_ValueError,
845 "Sign not allowed with integer"
846 " format specifier 'c'");
847 goto done;
848 }
849
850 /* taken from unicodeobject.c formatchar() */
851 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200852 x = PyLong_AsLong(value);
853 if (x == -1 && PyErr_Occurred())
854 goto done;
855 if (x < 0 || x > 0x10ffff) {
856 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100857 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 goto done;
859 }
860 tmp = PyUnicode_FromOrdinal(x);
861 inumeric_chars = 0;
862 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100863 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864
865 /* As a sort-of hack, we tell calc_number_widths that we only
866 have "remainder" characters. calc_number_widths thinks
867 these are characters that don't get formatted, only copied
868 into the output string. We do this for 'c' formatting,
869 because the characters are likely to be non-digits. */
870 n_remainder = 1;
871 }
872 else {
873 int base;
874 int leading_chars_to_skip = 0; /* Number of characters added by
875 PyNumber_ToBase that we want to
876 skip over. */
877
878 /* Compute the base and how many characters will be added by
879 PyNumber_ToBase */
880 switch (format->type) {
881 case 'b':
882 base = 2;
883 leading_chars_to_skip = 2; /* 0b */
884 break;
885 case 'o':
886 base = 8;
887 leading_chars_to_skip = 2; /* 0o */
888 break;
889 case 'x':
890 case 'X':
891 base = 16;
892 leading_chars_to_skip = 2; /* 0x */
893 break;
894 default: /* shouldn't be needed, but stops a compiler warning */
895 case 'd':
896 case 'n':
897 base = 10;
898 break;
899 }
900
Victor Stinnerd3f08822012-05-29 12:57:52 +0200901 if (format->sign != '+' && format->sign != ' '
902 && format->width == -1
903 && format->type != 'X' && format->type != 'n'
904 && !format->thousands_separators
905 && PyLong_CheckExact(value))
906 {
907 /* Fast path */
908 return _PyLong_FormatWriter(writer, value, base, format->alternate);
909 }
910
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200911 /* The number of prefix chars is the same as the leading
912 chars to skip */
913 if (format->alternate)
914 n_prefix = leading_chars_to_skip;
915
916 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200917 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200918 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
919 goto done;
920
921 inumeric_chars = 0;
922 n_digits = PyUnicode_GET_LENGTH(tmp);
923
924 prefix = inumeric_chars;
925
926 /* Is a sign character present in the output? If so, remember it
927 and skip it */
928 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
929 sign_char = '-';
930 ++prefix;
931 ++leading_chars_to_skip;
932 }
933
934 /* Skip over the leading chars (0x, 0b, etc.) */
935 n_digits -= leading_chars_to_skip;
936 inumeric_chars += leading_chars_to_skip;
937 }
938
939 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100940 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
941 (format->thousands_separators ?
942 LT_DEFAULT_LOCALE :
943 LT_NO_LOCALE),
944 &locale) == -1)
945 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946
947 /* Calculate how much memory we'll need. */
948 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100949 inumeric_chars + n_digits, n_remainder, 0,
950 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100951
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200953 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 goto done;
955
956 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200957 result = fill_number(writer, &spec,
958 tmp, inumeric_chars, inumeric_chars + n_digits,
959 tmp, prefix,
960 format->fill_char == '\0' ? ' ' : format->fill_char,
961 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200962
963done:
964 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +0100965 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 return result;
967}
968
969/************************************************************************/
970/*********** float formatting *******************************************/
971/************************************************************************/
972
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200974static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +0200976 const InternalFormatSpec *format,
977 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200978{
979 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
980 Py_ssize_t n_digits;
981 Py_ssize_t n_remainder;
982 Py_ssize_t n_total;
983 int has_decimal;
984 double val;
Victor Stinner76d38502013-06-24 23:34:15 +0200985 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200986 Py_UCS4 type = format->type;
987 int add_pct = 0;
988 Py_ssize_t index;
989 NumberFieldWidths spec;
990 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200991 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100992 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 Py_UCS4 sign_char = '\0';
994 int float_type; /* Used to see if we have a nan, inf, or regular float. */
995 PyObject *unicode_tmp = NULL;
996
997 /* Locale settings, either from the actual locale or
998 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100999 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000
Victor Stinner2f084ec2013-06-23 14:54:30 +02001001 if (format->precision > INT_MAX) {
1002 PyErr_SetString(PyExc_ValueError, "precision too big");
1003 goto done;
1004 }
1005 precision = (int)format->precision;
1006
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001007 if (format->alternate)
1008 flags |= Py_DTSF_ALT;
1009
1010 if (type == '\0') {
1011 /* Omitted type specifier. Behaves in the same way as repr(x)
1012 and str(x) if no precision is given, else like 'g', but with
1013 at least one digit after the decimal point. */
1014 flags |= Py_DTSF_ADD_DOT_0;
1015 type = 'r';
1016 default_precision = 0;
1017 }
1018
1019 if (type == 'n')
1020 /* 'n' is the same as 'g', except for the locale used to
1021 format the result. We take care of that later. */
1022 type = 'g';
1023
1024 val = PyFloat_AsDouble(value);
1025 if (val == -1.0 && PyErr_Occurred())
1026 goto done;
1027
1028 if (type == '%') {
1029 type = 'f';
1030 val *= 100;
1031 add_pct = 1;
1032 }
1033
1034 if (precision < 0)
1035 precision = default_precision;
1036 else if (type == 'r')
1037 type = 'g';
1038
1039 /* Cast "type", because if we're in unicode we need to pass a
1040 8-bit char. This is safe, because we've restricted what "type"
1041 can be. */
1042 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1043 &float_type);
1044 if (buf == NULL)
1045 goto done;
1046 n_digits = strlen(buf);
1047
1048 if (add_pct) {
1049 /* We know that buf has a trailing zero (since we just called
1050 strlen() on it), and we don't use that fact any more. So we
1051 can just write over the trailing zero. */
1052 buf[n_digits] = '%';
1053 n_digits += 1;
1054 }
1055
Victor Stinnerd3f08822012-05-29 12:57:52 +02001056 if (format->sign != '+' && format->sign != ' '
1057 && format->width == -1
1058 && format->type != 'n'
1059 && !format->thousands_separators)
1060 {
1061 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001062 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1063 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001064 return result;
1065 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001066
Victor Stinner4a587072013-11-19 12:54:53 +01001067 /* Since there is no unicode version of PyOS_double_to_string,
1068 just use the 8 bit version and then convert to unicode. */
1069 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1070 PyMem_Free(buf);
1071 if (unicode_tmp == NULL)
1072 goto done;
1073
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001074 /* Is a sign character present in the output? If so, remember it
1075 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001076 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001077 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1078 sign_char = '-';
1079 ++index;
1080 --n_digits;
1081 }
1082
1083 /* Determine if we have any "remainder" (after the digits, might include
1084 decimal or exponent or both (or neither)) */
1085 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1086
1087 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001088 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1089 (format->thousands_separators ?
1090 LT_DEFAULT_LOCALE :
1091 LT_NO_LOCALE),
1092 &locale) == -1)
1093 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001094
1095 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001096 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001097 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001098 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001099
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001100 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001101 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001102 goto done;
1103
1104 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001105 result = fill_number(writer, &spec,
1106 unicode_tmp, index, index + n_digits,
1107 NULL, 0,
1108 format->fill_char == '\0' ? ' ' : format->fill_char,
1109 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110
1111done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001112 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001113 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 return result;
1115}
1116
1117/************************************************************************/
1118/*********** complex formatting *****************************************/
1119/************************************************************************/
1120
Victor Stinnerd3f08822012-05-29 12:57:52 +02001121static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001123 const InternalFormatSpec *format,
1124 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125{
1126 double re;
1127 double im;
1128 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1129 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1130
1131 InternalFormatSpec tmp_format = *format;
1132 Py_ssize_t n_re_digits;
1133 Py_ssize_t n_im_digits;
1134 Py_ssize_t n_re_remainder;
1135 Py_ssize_t n_im_remainder;
1136 Py_ssize_t n_re_total;
1137 Py_ssize_t n_im_total;
1138 int re_has_decimal;
1139 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001140 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001141 Py_UCS4 type = format->type;
1142 Py_ssize_t i_re;
1143 Py_ssize_t i_im;
1144 NumberFieldWidths re_spec;
1145 NumberFieldWidths im_spec;
1146 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001147 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001148 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001149 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151 Py_UCS4 re_sign_char = '\0';
1152 Py_UCS4 im_sign_char = '\0';
1153 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1154 int im_float_type;
1155 int add_parens = 0;
1156 int skip_re = 0;
1157 Py_ssize_t lpad;
1158 Py_ssize_t rpad;
1159 Py_ssize_t total;
1160 PyObject *re_unicode_tmp = NULL;
1161 PyObject *im_unicode_tmp = NULL;
1162
1163 /* Locale settings, either from the actual locale or
1164 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001165 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001166
Victor Stinner2f084ec2013-06-23 14:54:30 +02001167 if (format->precision > INT_MAX) {
1168 PyErr_SetString(PyExc_ValueError, "precision too big");
1169 goto done;
1170 }
1171 precision = (int)format->precision;
1172
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001173 /* Zero padding is not allowed. */
1174 if (format->fill_char == '0') {
1175 PyErr_SetString(PyExc_ValueError,
1176 "Zero padding is not allowed in complex format "
1177 "specifier");
1178 goto done;
1179 }
1180
1181 /* Neither is '=' alignment . */
1182 if (format->align == '=') {
1183 PyErr_SetString(PyExc_ValueError,
1184 "'=' alignment flag is not allowed in complex format "
1185 "specifier");
1186 goto done;
1187 }
1188
1189 re = PyComplex_RealAsDouble(value);
1190 if (re == -1.0 && PyErr_Occurred())
1191 goto done;
1192 im = PyComplex_ImagAsDouble(value);
1193 if (im == -1.0 && PyErr_Occurred())
1194 goto done;
1195
1196 if (format->alternate)
1197 flags |= Py_DTSF_ALT;
1198
1199 if (type == '\0') {
1200 /* Omitted type specifier. Should be like str(self). */
1201 type = 'r';
1202 default_precision = 0;
1203 if (re == 0.0 && copysign(1.0, re) == 1.0)
1204 skip_re = 1;
1205 else
1206 add_parens = 1;
1207 }
1208
1209 if (type == 'n')
1210 /* 'n' is the same as 'g', except for the locale used to
1211 format the result. We take care of that later. */
1212 type = 'g';
1213
1214 if (precision < 0)
1215 precision = default_precision;
1216 else if (type == 'r')
1217 type = 'g';
1218
1219 /* Cast "type", because if we're in unicode we need to pass a
1220 8-bit char. This is safe, because we've restricted what "type"
1221 can be. */
1222 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1223 &re_float_type);
1224 if (re_buf == NULL)
1225 goto done;
1226 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1227 &im_float_type);
1228 if (im_buf == NULL)
1229 goto done;
1230
1231 n_re_digits = strlen(re_buf);
1232 n_im_digits = strlen(im_buf);
1233
1234 /* Since there is no unicode version of PyOS_double_to_string,
1235 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001236 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001237 if (re_unicode_tmp == NULL)
1238 goto done;
1239 i_re = 0;
1240
Victor Stinnerd3f08822012-05-29 12:57:52 +02001241 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001242 if (im_unicode_tmp == NULL)
1243 goto done;
1244 i_im = 0;
1245
1246 /* Is a sign character present in the output? If so, remember it
1247 and skip it */
1248 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1249 re_sign_char = '-';
1250 ++i_re;
1251 --n_re_digits;
1252 }
1253 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1254 im_sign_char = '-';
1255 ++i_im;
1256 --n_im_digits;
1257 }
1258
1259 /* Determine if we have any "remainder" (after the digits, might include
1260 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001261 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001263 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 &n_im_remainder, &im_has_decimal);
1265
1266 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001267 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1268 (format->thousands_separators ?
1269 LT_DEFAULT_LOCALE :
1270 LT_NO_LOCALE),
1271 &locale) == -1)
1272 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273
1274 /* Turn off any padding. We'll do it later after we've composed
1275 the numbers without padding. */
1276 tmp_format.fill_char = '\0';
1277 tmp_format.align = '<';
1278 tmp_format.width = -1;
1279
1280 /* Calculate how much memory we'll need. */
1281 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1282 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001283 re_has_decimal, &locale, &tmp_format,
1284 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285
1286 /* Same formatting, but always include a sign, unless the real part is
1287 * going to be omitted, in which case we use whatever sign convention was
1288 * requested by the original format. */
1289 if (!skip_re)
1290 tmp_format.sign = '+';
1291 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1292 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001293 im_has_decimal, &locale, &tmp_format,
1294 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295
1296 if (skip_re)
1297 n_re_total = 0;
1298
1299 /* Add 1 for the 'j', and optionally 2 for parens. */
1300 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1301 format->width, format->align, &lpad, &rpad, &total);
1302
Victor Stinner41a863c2012-02-24 00:37:51 +01001303 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001304 maxchar = Py_MAX(maxchar, format->fill_char);
1305
Victor Stinnerd3f08822012-05-29 12:57:52 +02001306 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001308 rkind = writer->kind;
1309 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310
1311 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001312 result = fill_padding(writer,
1313 n_re_total + n_im_total + 1 + add_parens * 2,
1314 format->fill_char=='\0' ? ' ' : format->fill_char,
1315 lpad, rpad);
1316 if (result == -1)
1317 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318
Victor Stinnerd3f08822012-05-29 12:57:52 +02001319 if (add_parens) {
1320 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1321 writer->pos++;
1322 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001323
1324 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001325 result = fill_number(writer, &re_spec,
1326 re_unicode_tmp, i_re, i_re + n_re_digits,
1327 NULL, 0,
1328 0,
1329 &locale, 0);
1330 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001331 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001332 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001333 result = fill_number(writer, &im_spec,
1334 im_unicode_tmp, i_im, i_im + n_im_digits,
1335 NULL, 0,
1336 0,
1337 &locale, 0);
1338 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001339 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001340 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1341 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342
Victor Stinnerd3f08822012-05-29 12:57:52 +02001343 if (add_parens) {
1344 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1345 writer->pos++;
1346 }
1347
1348 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349
1350done:
1351 PyMem_Free(re_buf);
1352 PyMem_Free(im_buf);
1353 Py_XDECREF(re_unicode_tmp);
1354 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001355 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 return result;
1357}
1358
1359/************************************************************************/
1360/*********** built in formatters ****************************************/
1361/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001362static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001363format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1364{
1365 PyObject *str;
1366 int err;
1367
1368 str = PyObject_Str(obj);
1369 if (str == NULL)
1370 return -1;
1371 err = _PyUnicodeWriter_WriteStr(writer, str);
1372 Py_DECREF(str);
1373 return err;
1374}
1375
1376int
1377_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1378 PyObject *obj,
1379 PyObject *format_spec,
1380 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001381{
1382 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001383
1384 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001385
1386 /* check for the special case of zero length format spec, make
1387 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001388 if (start == end) {
1389 if (PyUnicode_CheckExact(obj))
1390 return _PyUnicodeWriter_WriteStr(writer, obj);
1391 else
1392 return format_obj(obj, writer);
1393 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394
1395 /* parse the format_spec */
1396 if (!parse_internal_render_format_spec(format_spec, start, end,
1397 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001398 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399
1400 /* type conversion? */
1401 switch (format.type) {
1402 case 's':
1403 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001404 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001405 default:
1406 /* unknown */
1407 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001408 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001410}
1411
Victor Stinnerd3f08822012-05-29 12:57:52 +02001412int
1413_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1414 PyObject *obj,
1415 PyObject *format_spec,
1416 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001417{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001418 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001419 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001420 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001421
1422 /* check for the special case of zero length format spec, make
1423 it equivalent to str(obj) */
1424 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001425 if (PyLong_CheckExact(obj))
1426 return _PyLong_FormatWriter(writer, obj, 10, 0);
1427 else
1428 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001429 }
1430
1431 /* parse the format_spec */
1432 if (!parse_internal_render_format_spec(format_spec, start, end,
1433 &format, 'd', '>'))
1434 goto done;
1435
1436 /* type conversion? */
1437 switch (format.type) {
1438 case 'b':
1439 case 'c':
1440 case 'd':
1441 case 'o':
1442 case 'x':
1443 case 'X':
1444 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001445 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001446 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001447 break;
1448
1449 case 'e':
1450 case 'E':
1451 case 'f':
1452 case 'F':
1453 case 'g':
1454 case 'G':
1455 case '%':
1456 /* convert to float */
1457 tmp = PyNumber_Float(obj);
1458 if (tmp == NULL)
1459 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001460 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001461 break;
1462
1463 default:
1464 /* unknown */
1465 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1466 goto done;
1467 }
1468
1469done:
1470 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001471 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001472 return result;
1473}
1474
Victor Stinnerd3f08822012-05-29 12:57:52 +02001475int
1476_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1477 PyObject *obj,
1478 PyObject *format_spec,
1479 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001480{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001481 InternalFormatSpec format;
1482
1483 /* check for the special case of zero length format spec, make
1484 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001485 if (start == end)
1486 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001487
1488 /* parse the format_spec */
1489 if (!parse_internal_render_format_spec(format_spec, start, end,
1490 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001491 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001492
1493 /* type conversion? */
1494 switch (format.type) {
1495 case '\0': /* No format code: like 'g', but with at least one decimal. */
1496 case 'e':
1497 case 'E':
1498 case 'f':
1499 case 'F':
1500 case 'g':
1501 case 'G':
1502 case 'n':
1503 case '%':
1504 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001505 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001506
1507 default:
1508 /* unknown */
1509 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001510 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001512}
1513
Victor Stinnerd3f08822012-05-29 12:57:52 +02001514int
1515_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1516 PyObject *obj,
1517 PyObject *format_spec,
1518 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001519{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001520 InternalFormatSpec format;
1521
1522 /* check for the special case of zero length format spec, make
1523 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001524 if (start == end)
1525 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001526
1527 /* parse the format_spec */
1528 if (!parse_internal_render_format_spec(format_spec, start, end,
1529 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001530 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001531
1532 /* type conversion? */
1533 switch (format.type) {
1534 case '\0': /* No format code: like 'g', but with at least one decimal. */
1535 case 'e':
1536 case 'E':
1537 case 'f':
1538 case 'F':
1539 case 'g':
1540 case 'G':
1541 case 'n':
1542 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001543 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001544
1545 default:
1546 /* unknown */
1547 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001548 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001549 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001550}