blob: 9192bfd6a670fbc30fa7cfa68551f58ad3e7b3fa [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040035 "Cannot specify ',' or '_' with '%c'.",
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020036 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040039 "Cannot specify ',' or '_' with '\\x%x'.",
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020040 (unsigned int)presentation_type);
41}
42
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040043static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070044invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045{
46 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
47}
48
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020049/*
50 get_integer consumes 0 or more decimal digit characters from an
51 input string, updates *result with the corresponding positive
52 integer, and returns the number of digits consumed.
53
54 returns -1 on error.
55*/
56static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030057get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020058 Py_ssize_t *result)
59{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030060 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020061 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 int kind = PyUnicode_KIND(str);
63 void *data = PyUnicode_DATA(str);
64
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020065 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030066 for (; pos < end; pos++, numdigits++) {
67 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 if (digitval < 0)
69 break;
70 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000071 Detect possible overflow before it happens:
72
73 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
74 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 */
Mark Dickinson47862d42011-12-01 15:27:04 +000076 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 PyErr_Format(PyExc_ValueError,
78 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030079 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020080 return -1;
81 }
Mark Dickinson47862d42011-12-01 15:27:04 +000082 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020083 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030084 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 *result = accumulator;
86 return numdigits;
87}
88
89/************************************************************************/
90/*********** standard format specifier parsing **************************/
91/************************************************************************/
92
93/* returns true if this character is a specifier alignment token */
94Py_LOCAL_INLINE(int)
95is_alignment_token(Py_UCS4 c)
96{
97 switch (c) {
98 case '<': case '>': case '=': case '^':
99 return 1;
100 default:
101 return 0;
102 }
103}
104
105/* returns true if this character is a sign element */
106Py_LOCAL_INLINE(int)
107is_sign_element(Py_UCS4 c)
108{
109 switch (c) {
110 case ' ': case '+': case '-':
111 return 1;
112 default:
113 return 0;
114 }
115}
Eric Smith8c663262007-08-25 02:26:07 +0000116
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400117/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700118enum LocaleType {
119 LT_NO_LOCALE = 0,
120 LT_DEFAULT_LOCALE,
121 LT_UNDERSCORE_LOCALE,
122 LT_UNDER_FOUR_LOCALE,
123 LT_CURRENT_LOCALE
124};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000125
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200126typedef struct {
127 Py_UCS4 fill_char;
128 Py_UCS4 align;
129 int alternate;
130 Py_UCS4 sign;
131 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700132 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200133 Py_ssize_t precision;
134 Py_UCS4 type;
135} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000136
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200137#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700138/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139static void
140DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
141{
142 printf("internal format spec: fill_char %d\n", format->fill_char);
143 printf("internal format spec: align %d\n", format->align);
144 printf("internal format spec: alternate %d\n", format->alternate);
145 printf("internal format spec: sign %d\n", format->sign);
146 printf("internal format spec: width %zd\n", format->width);
147 printf("internal format spec: thousands_separators %d\n",
148 format->thousands_separators);
149 printf("internal format spec: precision %zd\n", format->precision);
150 printf("internal format spec: type %c\n", format->type);
151 printf("\n");
152}
153#endif
154
155
156/*
157 ptr points to the start of the format_spec, end points just past its end.
158 fills in format with the parsed information.
159 returns 1 on success, 0 on failure.
160 if failure, sets the exception
161*/
162static int
163parse_internal_render_format_spec(PyObject *format_spec,
164 Py_ssize_t start, Py_ssize_t end,
165 InternalFormatSpec *format,
166 char default_type,
167 char default_align)
168{
169 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300170 int kind = PyUnicode_KIND(format_spec);
171 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 /* end-pos is used throughout this code to specify the length of
173 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300174#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175
176 Py_ssize_t consumed;
177 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400178 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181 format->align = default_align;
182 format->alternate = 0;
183 format->sign = '\0';
184 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700185 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 format->precision = -1;
187 format->type = default_type;
188
189 /* If the second char is an alignment token,
190 then parse the fill char */
191 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
192 format->align = READ_spec(pos+1);
193 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400194 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200195 align_specified = 1;
196 pos += 2;
197 }
198 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
199 format->align = READ_spec(pos);
200 align_specified = 1;
201 ++pos;
202 }
203
204 /* Parse the various sign options */
205 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
206 format->sign = READ_spec(pos);
207 ++pos;
208 }
209
210 /* If the next character is #, we're in alternate mode. This only
211 applies to integers. */
212 if (end-pos >= 1 && READ_spec(pos) == '#') {
213 format->alternate = 1;
214 ++pos;
215 }
216
217 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400218 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200219 format->fill_char = '0';
220 if (!align_specified) {
221 format->align = '=';
222 }
223 ++pos;
224 }
225
226 consumed = get_integer(format_spec, &pos, end, &format->width);
227 if (consumed == -1)
228 /* Overflow error. Exception already set. */
229 return 0;
230
231 /* If consumed is 0, we didn't consume any characters for the
232 width. In that case, reset the width to -1, because
233 get_integer() will have set it to zero. -1 is how we record
234 that the width wasn't specified. */
235 if (consumed == 0)
236 format->width = -1;
237
238 /* Comma signifies add thousands separators */
239 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400240 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200241 ++pos;
242 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400243 /* Underscore signifies add thousands separators */
244 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700245 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400246 invalid_comma_and_underscore();
247 return 0;
248 }
249 format->thousands_separators = LT_UNDERSCORE_LOCALE;
250 ++pos;
251 }
252 if (end-pos && READ_spec(pos) == ',') {
253 invalid_comma_and_underscore();
254 return 0;
255 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200256
257 /* Parse field precision */
258 if (end-pos && READ_spec(pos) == '.') {
259 ++pos;
260
261 consumed = get_integer(format_spec, &pos, end, &format->precision);
262 if (consumed == -1)
263 /* Overflow error. Exception already set. */
264 return 0;
265
266 /* Not having a precision after a dot is an error. */
267 if (consumed == 0) {
268 PyErr_Format(PyExc_ValueError,
269 "Format specifier missing precision");
270 return 0;
271 }
272
273 }
274
275 /* Finally, parse the type field. */
276
277 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500278 /* More than one char remain, invalid format specifier. */
279 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200280 return 0;
281 }
282
283 if (end-pos == 1) {
284 format->type = READ_spec(pos);
285 ++pos;
286 }
287
288 /* Do as much validating as we can, just by looking at the format
289 specifier. Do not take into account what type of formatting
290 we're doing (int, float, string). */
291
292 if (format->thousands_separators) {
293 switch (format->type) {
294 case 'd':
295 case 'e':
296 case 'f':
297 case 'g':
298 case 'E':
299 case 'G':
300 case '%':
301 case 'F':
302 case '\0':
303 /* These are allowed. See PEP 378.*/
304 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400305 case 'b':
306 case 'o':
307 case 'x':
308 case 'X':
309 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
310 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
311 /* Every four digits, not every three, in bin/oct/hex. */
312 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
313 break;
314 }
Victor Stinnerc0e77362017-09-12 16:09:44 -0700315 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316 default:
317 invalid_comma_type(format->type);
318 return 0;
319 }
320 }
321
Victor Stinnera4ac6002012-01-21 15:50:49 +0100322 assert (format->align <= 127);
323 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 return 1;
325}
326
327/* Calculate the padding needed. */
328static void
329calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
330 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
331 Py_ssize_t *n_total)
332{
333 if (width >= 0) {
334 if (nchars > width)
335 *n_total = nchars;
336 else
337 *n_total = width;
338 }
339 else {
340 /* not specified, use all of the chars and no more */
341 *n_total = nchars;
342 }
343
344 /* Figure out how much leading space we need, based on the
345 aligning */
346 if (align == '>')
347 *n_lpadding = *n_total - nchars;
348 else if (align == '^')
349 *n_lpadding = (*n_total - nchars) / 2;
350 else if (align == '<' || align == '=')
351 *n_lpadding = 0;
352 else {
353 /* We should never have an unspecified alignment. */
354 *n_lpadding = 0;
355 assert(0);
356 }
357
358 *n_rpadding = *n_total - nchars - *n_lpadding;
359}
360
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200361/* Do the padding, and return a pointer to where the caller-supplied
362 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200363static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200364fill_padding(_PyUnicodeWriter *writer,
365 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
367 Py_ssize_t n_rpadding)
368{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200369 Py_ssize_t pos;
370
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200372 if (n_lpadding) {
373 pos = writer->pos;
374 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376
377 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200378 if (n_rpadding) {
379 pos = writer->pos + nchars + n_lpadding;
380 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
381 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200382
383 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200384 writer->pos += n_lpadding;
385 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200386}
387
388/************************************************************************/
389/*********** common routines for numeric formatting *********************/
390/************************************************************************/
391
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200392/* Locale info needed for formatting integers and the part of floats
393 before and including the decimal. Note that locales only support
394 8-bit chars, not unicode. */
395typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100396 PyObject *decimal_point;
397 PyObject *thousands_sep;
398 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200399} LocaleInfo;
400
Victor Stinner41a863c2012-02-24 00:37:51 +0100401#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
402
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403/* describes the layout for an integer, see the comment in
404 calc_number_widths() for details */
405typedef struct {
406 Py_ssize_t n_lpadding;
407 Py_ssize_t n_prefix;
408 Py_ssize_t n_spadding;
409 Py_ssize_t n_rpadding;
410 char sign;
411 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
412 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
413 any grouping chars. */
414 Py_ssize_t n_decimal; /* 0 if only an integer */
415 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
416 excluding the decimal itself, if
417 present. */
418
419 /* These 2 are not the widths of fields, but are needed by
420 STRINGLIB_GROUPING. */
421 Py_ssize_t n_digits; /* The number of digits before a decimal
422 or exponent. */
423 Py_ssize_t n_min_width; /* The min_width we used when we computed
424 the n_grouped_digits width. */
425} NumberFieldWidths;
426
427
428/* Given a number of the form:
429 digits[remainder]
430 where ptr points to the start and end points to the end, find where
431 the integer part ends. This could be a decimal, an exponent, both,
432 or neither.
433 If a decimal point is present, set *has_decimal and increment
434 remainder beyond it.
435 Results are undefined (but shouldn't crash) for improperly
436 formatted strings.
437*/
438static void
439parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
440 Py_ssize_t *n_remainder, int *has_decimal)
441{
442 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300443 int kind = PyUnicode_KIND(s);
444 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200445
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300446 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 ++pos;
448 remainder = pos;
449
450 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300451 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452
453 /* Skip the decimal point. */
454 if (*has_decimal)
455 remainder++;
456
457 *n_remainder = end - remainder;
458}
459
460/* not all fields of format are used. for example, precision is
461 unused. should this take discrete params in order to be more clear
462 about what it does? or is passing a single format parameter easier
463 and more efficient enough to justify a little obfuscation? */
464static Py_ssize_t
465calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
466 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
467 Py_ssize_t n_end, Py_ssize_t n_remainder,
468 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100469 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470{
471 Py_ssize_t n_non_digit_non_padding;
472 Py_ssize_t n_padding;
473
474 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
475 spec->n_lpadding = 0;
476 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100477 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 spec->n_remainder = n_remainder;
479 spec->n_spadding = 0;
480 spec->n_rpadding = 0;
481 spec->sign = '\0';
482 spec->n_sign = 0;
483
484 /* the output will look like:
485 | |
486 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
487 | |
488
489 sign is computed from format->sign and the actual
490 sign of the number
491
492 prefix is given (it's for the '0x' prefix)
493
494 digits is already known
495
496 the total width is either given, or computed from the
497 actual digits
498
499 only one of lpadding, spadding, and rpadding can be non-zero,
500 and it's calculated from the width and other fields
501 */
502
503 /* compute the various parts we're going to write */
504 switch (format->sign) {
505 case '+':
506 /* always put a + or - */
507 spec->n_sign = 1;
508 spec->sign = (sign_char == '-' ? '-' : '+');
509 break;
510 case ' ':
511 spec->n_sign = 1;
512 spec->sign = (sign_char == '-' ? '-' : ' ');
513 break;
514 default:
515 /* Not specified, or the default (-) */
516 if (sign_char == '-') {
517 spec->n_sign = 1;
518 spec->sign = '-';
519 }
520 }
521
522 /* The number of chars used for non-digits and non-padding. */
523 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
524 spec->n_remainder;
525
526 /* min_width can go negative, that's okay. format->width == -1 means
527 we don't care. */
528 if (format->fill_char == '0' && format->align == '=')
529 spec->n_min_width = format->width - n_non_digit_non_padding;
530 else
531 spec->n_min_width = 0;
532
533 if (spec->n_digits == 0)
534 /* This case only occurs when using 'c' formatting, we need
535 to special case it because the grouping code always wants
536 to have at least one character. */
537 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100538 else {
539 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200540 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100541 NULL, 0,
542 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200543 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100544 locale->grouping, locale->thousands_sep, &grouping_maxchar);
545 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
546 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200547
548 /* Given the desired width and the total of digit and non-digit
549 space we consume, see if we need any padding. format->width can
550 be negative (meaning no padding), but this code still works in
551 that case. */
552 n_padding = format->width -
553 (n_non_digit_non_padding + spec->n_grouped_digits);
554 if (n_padding > 0) {
555 /* Some padding is needed. Determine if it's left, space, or right. */
556 switch (format->align) {
557 case '<':
558 spec->n_rpadding = n_padding;
559 break;
560 case '^':
561 spec->n_lpadding = n_padding / 2;
562 spec->n_rpadding = n_padding - spec->n_lpadding;
563 break;
564 case '=':
565 spec->n_spadding = n_padding;
566 break;
567 case '>':
568 spec->n_lpadding = n_padding;
569 break;
570 default:
571 /* Shouldn't get here, but treat it as '>' */
572 spec->n_lpadding = n_padding;
573 assert(0);
574 break;
575 }
576 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100577
578 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
579 *maxchar = Py_MAX(*maxchar, format->fill_char);
580
Victor Stinner90f50d42012-02-24 01:44:47 +0100581 if (spec->n_decimal)
582 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
583
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200584 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
585 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
586 spec->n_remainder + spec->n_rpadding;
587}
588
589/* Fill in the digit parts of a numbers's string representation,
590 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200591 Return -1 on error, or 0 on success. */
592static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200593fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200594 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200595 PyObject *prefix, Py_ssize_t p_start,
596 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200597 LocaleInfo *locale, int toupper)
598{
599 /* Used to keep track of digits, decimal, and remainder. */
600 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200601 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200602 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200604
605 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200606 _PyUnicode_FastFill(writer->buffer,
607 writer->pos, spec->n_lpadding, fill_char);
608 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 }
610 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200611 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
612 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200613 }
614 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200615 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
616 prefix, p_start,
617 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200618 if (toupper) {
619 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500620 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200621 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100622 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100623 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200624 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500625 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 }
629 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200630 _PyUnicode_FastFill(writer->buffer,
631 writer->pos, spec->n_spadding, fill_char);
632 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 }
634
635 /* Only for type 'c' special case, it has no digits. */
636 if (spec->n_digits != 0) {
637 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200638 char *pdigits;
639 if (PyUnicode_READY(digits))
640 return -1;
641 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 if (PyUnicode_KIND(digits) < kind) {
643 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200644 if (pdigits == NULL)
645 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200646 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100647 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200648 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200650 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200651 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100652 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100653 if (r == -1)
654 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200656 if (PyUnicode_KIND(digits) < kind)
657 PyMem_Free(pdigits);
658 d_pos += spec->n_digits;
659 }
660 if (toupper) {
661 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500662 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200663 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100664 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500665 if (c > 127) {
666 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
667 return -1;
668 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200669 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500670 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200671 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200672 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200673
674 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200675 _PyUnicode_FastCopyCharacters(
676 writer->buffer, writer->pos,
677 locale->decimal_point, 0, spec->n_decimal);
678 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200679 d_pos += 1;
680 }
681
682 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200683 _PyUnicode_FastCopyCharacters(
684 writer->buffer, writer->pos,
685 digits, d_pos, spec->n_remainder);
686 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400687 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200688 }
689
690 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200691 _PyUnicode_FastFill(writer->buffer,
692 writer->pos, spec->n_rpadding,
693 fill_char);
694 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200695 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200696 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200697}
698
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200699static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200700
701/* Find the decimal point character(s?), thousands_separator(s?), and
702 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400703 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
704 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100705static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700706get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707{
708 switch (type) {
709 case LT_CURRENT_LOCALE: {
710 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100711 locale_info->decimal_point = PyUnicode_DecodeLocale(
712 locale_data->decimal_point,
713 NULL);
714 if (locale_info->decimal_point == NULL)
715 return -1;
716 locale_info->thousands_sep = PyUnicode_DecodeLocale(
717 locale_data->thousands_sep,
718 NULL);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700719 if (locale_info->thousands_sep == NULL)
Victor Stinner41a863c2012-02-24 00:37:51 +0100720 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200721 locale_info->grouping = locale_data->grouping;
722 break;
723 }
724 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400725 case LT_UNDERSCORE_LOCALE:
726 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100727 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400728 locale_info->thousands_sep = PyUnicode_FromOrdinal(
729 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700730 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100731 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400732 if (type != LT_UNDER_FOUR_LOCALE)
733 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 (implicit) trailing 0 means repeat
735 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400736 else
737 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200738 break;
739 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100740 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
741 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700742 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100743 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200744 locale_info->grouping = no_grouping;
745 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200746 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100747 return 0;
748}
749
750static void
751free_locale_info(LocaleInfo *locale_info)
752{
753 Py_XDECREF(locale_info->decimal_point);
754 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200755}
756
757/************************************************************************/
758/*********** string formatting ******************************************/
759/************************************************************************/
760
Victor Stinnerd3f08822012-05-29 12:57:52 +0200761static int
762format_string_internal(PyObject *value, const InternalFormatSpec *format,
763 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200764{
765 Py_ssize_t lpad;
766 Py_ssize_t rpad;
767 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200768 Py_ssize_t len;
769 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200770 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771
Victor Stinnerd3f08822012-05-29 12:57:52 +0200772 assert(PyUnicode_IS_READY(value));
773 len = PyUnicode_GET_LENGTH(value);
774
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200775 /* sign is not allowed on strings */
776 if (format->sign != '\0') {
777 PyErr_SetString(PyExc_ValueError,
778 "Sign not allowed in string format specifier");
779 goto done;
780 }
781
782 /* alternate is not allowed on strings */
783 if (format->alternate) {
784 PyErr_SetString(PyExc_ValueError,
785 "Alternate form (#) not allowed in string format "
786 "specifier");
787 goto done;
788 }
789
790 /* '=' alignment not allowed on strings */
791 if (format->align == '=') {
792 PyErr_SetString(PyExc_ValueError,
793 "'=' alignment not allowed "
794 "in string format specifier");
795 goto done;
796 }
797
Victor Stinner621ef3d2012-10-02 00:33:47 +0200798 if ((format->width == -1 || format->width <= len)
799 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200800 /* Fast path */
801 return _PyUnicodeWriter_WriteStr(writer, value);
802 }
803
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200804 /* if precision is specified, output no more that format.precision
805 characters */
806 if (format->precision >= 0 && len >= format->precision) {
807 len = format->precision;
808 }
809
810 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
811
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200812 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100813 if (lpad != 0 || rpad != 0)
814 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200815 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
816 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
817 maxchar = Py_MAX(maxchar, valmaxchar);
818 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100819
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200820 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200821 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 goto done;
823
824 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400825 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200826 if (result == -1)
827 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200828
829 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200830 if (len) {
831 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
832 value, 0, len);
833 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200834 writer->pos += (len + rpad);
835 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200836
837done:
838 return result;
839}
840
841
842/************************************************************************/
843/*********** long formatting ********************************************/
844/************************************************************************/
845
Victor Stinnerd3f08822012-05-29 12:57:52 +0200846static int
847format_long_internal(PyObject *value, const InternalFormatSpec *format,
848 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200849{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200850 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100851 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200852 PyObject *tmp = NULL;
853 Py_ssize_t inumeric_chars;
854 Py_UCS4 sign_char = '\0';
855 Py_ssize_t n_digits; /* count of digits need from the computed
856 string */
857 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
858 produces non-digits */
859 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
860 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100861 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200862 NumberFieldWidths spec;
863 long x;
864
865 /* Locale settings, either from the actual locale or
866 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100867 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200868
869 /* no precision allowed on integers */
870 if (format->precision != -1) {
871 PyErr_SetString(PyExc_ValueError,
872 "Precision not allowed in integer format specifier");
873 goto done;
874 }
875
876 /* special case for character formatting */
877 if (format->type == 'c') {
878 /* error to specify a sign */
879 if (format->sign != '\0') {
880 PyErr_SetString(PyExc_ValueError,
881 "Sign not allowed with integer"
882 " format specifier 'c'");
883 goto done;
884 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400885 /* error to request alternate format */
886 if (format->alternate) {
887 PyErr_SetString(PyExc_ValueError,
888 "Alternate form (#) not allowed with integer"
889 " format specifier 'c'");
890 goto done;
891 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200892
893 /* taken from unicodeobject.c formatchar() */
894 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200895 x = PyLong_AsLong(value);
896 if (x == -1 && PyErr_Occurred())
897 goto done;
898 if (x < 0 || x > 0x10ffff) {
899 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100900 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200901 goto done;
902 }
903 tmp = PyUnicode_FromOrdinal(x);
904 inumeric_chars = 0;
905 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100906 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200907
908 /* As a sort-of hack, we tell calc_number_widths that we only
909 have "remainder" characters. calc_number_widths thinks
910 these are characters that don't get formatted, only copied
911 into the output string. We do this for 'c' formatting,
912 because the characters are likely to be non-digits. */
913 n_remainder = 1;
914 }
915 else {
916 int base;
917 int leading_chars_to_skip = 0; /* Number of characters added by
918 PyNumber_ToBase that we want to
919 skip over. */
920
921 /* Compute the base and how many characters will be added by
922 PyNumber_ToBase */
923 switch (format->type) {
924 case 'b':
925 base = 2;
926 leading_chars_to_skip = 2; /* 0b */
927 break;
928 case 'o':
929 base = 8;
930 leading_chars_to_skip = 2; /* 0o */
931 break;
932 case 'x':
933 case 'X':
934 base = 16;
935 leading_chars_to_skip = 2; /* 0x */
936 break;
937 default: /* shouldn't be needed, but stops a compiler warning */
938 case 'd':
939 case 'n':
940 base = 10;
941 break;
942 }
943
Victor Stinnerd3f08822012-05-29 12:57:52 +0200944 if (format->sign != '+' && format->sign != ' '
945 && format->width == -1
946 && format->type != 'X' && format->type != 'n'
947 && !format->thousands_separators
948 && PyLong_CheckExact(value))
949 {
950 /* Fast path */
951 return _PyLong_FormatWriter(writer, value, base, format->alternate);
952 }
953
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 /* The number of prefix chars is the same as the leading
955 chars to skip */
956 if (format->alternate)
957 n_prefix = leading_chars_to_skip;
958
959 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200960 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200961 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
962 goto done;
963
964 inumeric_chars = 0;
965 n_digits = PyUnicode_GET_LENGTH(tmp);
966
967 prefix = inumeric_chars;
968
969 /* Is a sign character present in the output? If so, remember it
970 and skip it */
971 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
972 sign_char = '-';
973 ++prefix;
974 ++leading_chars_to_skip;
975 }
976
977 /* Skip over the leading chars (0x, 0b, etc.) */
978 n_digits -= leading_chars_to_skip;
979 inumeric_chars += leading_chars_to_skip;
980 }
981
982 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100983 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400984 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100985 &locale) == -1)
986 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987
988 /* Calculate how much memory we'll need. */
989 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100990 inumeric_chars + n_digits, n_remainder, 0,
991 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100992
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200994 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 goto done;
996
997 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200998 result = fill_number(writer, &spec,
999 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001000 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001001 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002
1003done:
1004 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001005 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 return result;
1007}
1008
1009/************************************************************************/
1010/*********** float formatting *******************************************/
1011/************************************************************************/
1012
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001013/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001014static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001015format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001016 const InternalFormatSpec *format,
1017 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001018{
1019 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1020 Py_ssize_t n_digits;
1021 Py_ssize_t n_remainder;
1022 Py_ssize_t n_total;
1023 int has_decimal;
1024 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001025 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026 Py_UCS4 type = format->type;
1027 int add_pct = 0;
1028 Py_ssize_t index;
1029 NumberFieldWidths spec;
1030 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001031 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001032 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001033 Py_UCS4 sign_char = '\0';
1034 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1035 PyObject *unicode_tmp = NULL;
1036
1037 /* Locale settings, either from the actual locale or
1038 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001039 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001040
Victor Stinner2f084ec2013-06-23 14:54:30 +02001041 if (format->precision > INT_MAX) {
1042 PyErr_SetString(PyExc_ValueError, "precision too big");
1043 goto done;
1044 }
1045 precision = (int)format->precision;
1046
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001047 if (format->alternate)
1048 flags |= Py_DTSF_ALT;
1049
1050 if (type == '\0') {
1051 /* Omitted type specifier. Behaves in the same way as repr(x)
1052 and str(x) if no precision is given, else like 'g', but with
1053 at least one digit after the decimal point. */
1054 flags |= Py_DTSF_ADD_DOT_0;
1055 type = 'r';
1056 default_precision = 0;
1057 }
1058
1059 if (type == 'n')
1060 /* 'n' is the same as 'g', except for the locale used to
1061 format the result. We take care of that later. */
1062 type = 'g';
1063
1064 val = PyFloat_AsDouble(value);
1065 if (val == -1.0 && PyErr_Occurred())
1066 goto done;
1067
1068 if (type == '%') {
1069 type = 'f';
1070 val *= 100;
1071 add_pct = 1;
1072 }
1073
1074 if (precision < 0)
1075 precision = default_precision;
1076 else if (type == 'r')
1077 type = 'g';
1078
Martin Panter4c359642016-05-08 13:53:41 +00001079 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 8-bit char. This is safe, because we've restricted what "type"
1081 can be. */
1082 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1083 &float_type);
1084 if (buf == NULL)
1085 goto done;
1086 n_digits = strlen(buf);
1087
1088 if (add_pct) {
1089 /* We know that buf has a trailing zero (since we just called
1090 strlen() on it), and we don't use that fact any more. So we
1091 can just write over the trailing zero. */
1092 buf[n_digits] = '%';
1093 n_digits += 1;
1094 }
1095
Victor Stinnerd3f08822012-05-29 12:57:52 +02001096 if (format->sign != '+' && format->sign != ' '
1097 && format->width == -1
1098 && format->type != 'n'
1099 && !format->thousands_separators)
1100 {
1101 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001102 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1103 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001104 return result;
1105 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001106
Victor Stinner4a587072013-11-19 12:54:53 +01001107 /* Since there is no unicode version of PyOS_double_to_string,
1108 just use the 8 bit version and then convert to unicode. */
1109 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1110 PyMem_Free(buf);
1111 if (unicode_tmp == NULL)
1112 goto done;
1113
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 /* Is a sign character present in the output? If so, remember it
1115 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001116 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001117 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1118 sign_char = '-';
1119 ++index;
1120 --n_digits;
1121 }
1122
1123 /* Determine if we have any "remainder" (after the digits, might include
1124 decimal or exponent or both (or neither)) */
1125 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1126
1127 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001128 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001129 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001130 &locale) == -1)
1131 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001132
1133 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001134 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001136 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001137
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001138 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001139 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140 goto done;
1141
1142 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001143 result = fill_number(writer, &spec,
1144 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001145 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001146 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147
1148done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001149 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001150 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151 return result;
1152}
1153
1154/************************************************************************/
1155/*********** complex formatting *****************************************/
1156/************************************************************************/
1157
Victor Stinnerd3f08822012-05-29 12:57:52 +02001158static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001159format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001160 const InternalFormatSpec *format,
1161 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001162{
1163 double re;
1164 double im;
1165 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1166 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1167
1168 InternalFormatSpec tmp_format = *format;
1169 Py_ssize_t n_re_digits;
1170 Py_ssize_t n_im_digits;
1171 Py_ssize_t n_re_remainder;
1172 Py_ssize_t n_im_remainder;
1173 Py_ssize_t n_re_total;
1174 Py_ssize_t n_im_total;
1175 int re_has_decimal;
1176 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001177 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001178 Py_UCS4 type = format->type;
1179 Py_ssize_t i_re;
1180 Py_ssize_t i_im;
1181 NumberFieldWidths re_spec;
1182 NumberFieldWidths im_spec;
1183 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001184 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001185 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001186 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001187 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001188 Py_UCS4 re_sign_char = '\0';
1189 Py_UCS4 im_sign_char = '\0';
1190 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1191 int im_float_type;
1192 int add_parens = 0;
1193 int skip_re = 0;
1194 Py_ssize_t lpad;
1195 Py_ssize_t rpad;
1196 Py_ssize_t total;
1197 PyObject *re_unicode_tmp = NULL;
1198 PyObject *im_unicode_tmp = NULL;
1199
1200 /* Locale settings, either from the actual locale or
1201 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001202 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001203
Victor Stinner2f084ec2013-06-23 14:54:30 +02001204 if (format->precision > INT_MAX) {
1205 PyErr_SetString(PyExc_ValueError, "precision too big");
1206 goto done;
1207 }
1208 precision = (int)format->precision;
1209
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001210 /* Zero padding is not allowed. */
1211 if (format->fill_char == '0') {
1212 PyErr_SetString(PyExc_ValueError,
1213 "Zero padding is not allowed in complex format "
1214 "specifier");
1215 goto done;
1216 }
1217
1218 /* Neither is '=' alignment . */
1219 if (format->align == '=') {
1220 PyErr_SetString(PyExc_ValueError,
1221 "'=' alignment flag is not allowed in complex format "
1222 "specifier");
1223 goto done;
1224 }
1225
1226 re = PyComplex_RealAsDouble(value);
1227 if (re == -1.0 && PyErr_Occurred())
1228 goto done;
1229 im = PyComplex_ImagAsDouble(value);
1230 if (im == -1.0 && PyErr_Occurred())
1231 goto done;
1232
1233 if (format->alternate)
1234 flags |= Py_DTSF_ALT;
1235
1236 if (type == '\0') {
1237 /* Omitted type specifier. Should be like str(self). */
1238 type = 'r';
1239 default_precision = 0;
1240 if (re == 0.0 && copysign(1.0, re) == 1.0)
1241 skip_re = 1;
1242 else
1243 add_parens = 1;
1244 }
1245
1246 if (type == 'n')
1247 /* 'n' is the same as 'g', except for the locale used to
1248 format the result. We take care of that later. */
1249 type = 'g';
1250
1251 if (precision < 0)
1252 precision = default_precision;
1253 else if (type == 'r')
1254 type = 'g';
1255
Martin Panter4c359642016-05-08 13:53:41 +00001256 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001257 8-bit char. This is safe, because we've restricted what "type"
1258 can be. */
1259 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1260 &re_float_type);
1261 if (re_buf == NULL)
1262 goto done;
1263 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1264 &im_float_type);
1265 if (im_buf == NULL)
1266 goto done;
1267
1268 n_re_digits = strlen(re_buf);
1269 n_im_digits = strlen(im_buf);
1270
1271 /* Since there is no unicode version of PyOS_double_to_string,
1272 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001273 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001274 if (re_unicode_tmp == NULL)
1275 goto done;
1276 i_re = 0;
1277
Victor Stinnerd3f08822012-05-29 12:57:52 +02001278 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 if (im_unicode_tmp == NULL)
1280 goto done;
1281 i_im = 0;
1282
1283 /* Is a sign character present in the output? If so, remember it
1284 and skip it */
1285 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1286 re_sign_char = '-';
1287 ++i_re;
1288 --n_re_digits;
1289 }
1290 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1291 im_sign_char = '-';
1292 ++i_im;
1293 --n_im_digits;
1294 }
1295
1296 /* Determine if we have any "remainder" (after the digits, might include
1297 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001298 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001300 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 &n_im_remainder, &im_has_decimal);
1302
1303 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001304 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001305 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001306 &locale) == -1)
1307 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308
1309 /* Turn off any padding. We'll do it later after we've composed
1310 the numbers without padding. */
1311 tmp_format.fill_char = '\0';
1312 tmp_format.align = '<';
1313 tmp_format.width = -1;
1314
1315 /* Calculate how much memory we'll need. */
1316 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1317 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001318 re_has_decimal, &locale, &tmp_format,
1319 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320
1321 /* Same formatting, but always include a sign, unless the real part is
1322 * going to be omitted, in which case we use whatever sign convention was
1323 * requested by the original format. */
1324 if (!skip_re)
1325 tmp_format.sign = '+';
1326 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1327 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001328 im_has_decimal, &locale, &tmp_format,
1329 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001330
1331 if (skip_re)
1332 n_re_total = 0;
1333
1334 /* Add 1 for the 'j', and optionally 2 for parens. */
1335 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1336 format->width, format->align, &lpad, &rpad, &total);
1337
Victor Stinner41a863c2012-02-24 00:37:51 +01001338 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001339 maxchar = Py_MAX(maxchar, format->fill_char);
1340
Victor Stinnerd3f08822012-05-29 12:57:52 +02001341 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001343 rkind = writer->kind;
1344 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001345
1346 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001347 result = fill_padding(writer,
1348 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001349 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001350 if (result == -1)
1351 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001352
Victor Stinnerd3f08822012-05-29 12:57:52 +02001353 if (add_parens) {
1354 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1355 writer->pos++;
1356 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001357
1358 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001359 result = fill_number(writer, &re_spec,
1360 re_unicode_tmp, i_re, i_re + n_re_digits,
1361 NULL, 0,
1362 0,
1363 &locale, 0);
1364 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001365 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001366 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001367 result = fill_number(writer, &im_spec,
1368 im_unicode_tmp, i_im, i_im + n_im_digits,
1369 NULL, 0,
1370 0,
1371 &locale, 0);
1372 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001373 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001374 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1375 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376
Victor Stinnerd3f08822012-05-29 12:57:52 +02001377 if (add_parens) {
1378 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1379 writer->pos++;
1380 }
1381
1382 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383
1384done:
1385 PyMem_Free(re_buf);
1386 PyMem_Free(im_buf);
1387 Py_XDECREF(re_unicode_tmp);
1388 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001389 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001390 return result;
1391}
1392
1393/************************************************************************/
1394/*********** built in formatters ****************************************/
1395/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001396static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001397format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1398{
1399 PyObject *str;
1400 int err;
1401
1402 str = PyObject_Str(obj);
1403 if (str == NULL)
1404 return -1;
1405 err = _PyUnicodeWriter_WriteStr(writer, str);
1406 Py_DECREF(str);
1407 return err;
1408}
1409
1410int
1411_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1412 PyObject *obj,
1413 PyObject *format_spec,
1414 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001415{
1416 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001417
1418 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001419
1420 /* check for the special case of zero length format spec, make
1421 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001422 if (start == end) {
1423 if (PyUnicode_CheckExact(obj))
1424 return _PyUnicodeWriter_WriteStr(writer, obj);
1425 else
1426 return format_obj(obj, writer);
1427 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001428
1429 /* parse the format_spec */
1430 if (!parse_internal_render_format_spec(format_spec, start, end,
1431 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001432 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001433
1434 /* type conversion? */
1435 switch (format.type) {
1436 case 's':
1437 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001438 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001439 default:
1440 /* unknown */
1441 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001442 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001444}
1445
Victor Stinnerd3f08822012-05-29 12:57:52 +02001446int
1447_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1448 PyObject *obj,
1449 PyObject *format_spec,
1450 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001451{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001452 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001453 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001454 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001455
1456 /* check for the special case of zero length format spec, make
1457 it equivalent to str(obj) */
1458 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001459 if (PyLong_CheckExact(obj))
1460 return _PyLong_FormatWriter(writer, obj, 10, 0);
1461 else
1462 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001463 }
1464
1465 /* parse the format_spec */
1466 if (!parse_internal_render_format_spec(format_spec, start, end,
1467 &format, 'd', '>'))
1468 goto done;
1469
1470 /* type conversion? */
1471 switch (format.type) {
1472 case 'b':
1473 case 'c':
1474 case 'd':
1475 case 'o':
1476 case 'x':
1477 case 'X':
1478 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001479 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001480 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001481 break;
1482
1483 case 'e':
1484 case 'E':
1485 case 'f':
1486 case 'F':
1487 case 'g':
1488 case 'G':
1489 case '%':
1490 /* convert to float */
1491 tmp = PyNumber_Float(obj);
1492 if (tmp == NULL)
1493 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001494 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 break;
1496
1497 default:
1498 /* unknown */
1499 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1500 goto done;
1501 }
1502
1503done:
1504 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001505 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001506 return result;
1507}
1508
Victor Stinnerd3f08822012-05-29 12:57:52 +02001509int
1510_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1511 PyObject *obj,
1512 PyObject *format_spec,
1513 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001514{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001515 InternalFormatSpec format;
1516
1517 /* check for the special case of zero length format spec, make
1518 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001519 if (start == end)
1520 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001521
1522 /* parse the format_spec */
1523 if (!parse_internal_render_format_spec(format_spec, start, end,
1524 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001525 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001526
1527 /* type conversion? */
1528 switch (format.type) {
1529 case '\0': /* No format code: like 'g', but with at least one decimal. */
1530 case 'e':
1531 case 'E':
1532 case 'f':
1533 case 'F':
1534 case 'g':
1535 case 'G':
1536 case 'n':
1537 case '%':
1538 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001539 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001540
1541 default:
1542 /* unknown */
1543 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001544 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001545 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001546}
1547
Victor Stinnerd3f08822012-05-29 12:57:52 +02001548int
1549_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1550 PyObject *obj,
1551 PyObject *format_spec,
1552 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001553{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001554 InternalFormatSpec format;
1555
1556 /* check for the special case of zero length format spec, make
1557 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001558 if (start == end)
1559 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001560
1561 /* parse the format_spec */
1562 if (!parse_internal_render_format_spec(format_spec, start, end,
1563 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001564 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001565
1566 /* type conversion? */
1567 switch (format.type) {
1568 case '\0': /* No format code: like 'g', but with at least one decimal. */
1569 case 'e':
1570 case 'E':
1571 case 'f':
1572 case 'F':
1573 case 'g':
1574 case 'G':
1575 case 'n':
1576 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001577 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001578
1579 default:
1580 /* unknown */
1581 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001582 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001583 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001584}