blob: 71e673d9f83258fc3de9c026d07965114b29859e [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
Dargor28773ca2017-10-15 04:41:13 +010035 "Cannot specify ',' with '%c'.",
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020036 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
Dargor28773ca2017-10-15 04:41:13 +010039 "Cannot specify ',' with '\\x%x'.",
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020040 (unsigned int)presentation_type);
41}
42
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040043static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070044invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045{
46 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
47}
48
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020049/*
50 get_integer consumes 0 or more decimal digit characters from an
51 input string, updates *result with the corresponding positive
52 integer, and returns the number of digits consumed.
53
54 returns -1 on error.
55*/
56static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030057get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020058 Py_ssize_t *result)
59{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030060 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020061 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 int kind = PyUnicode_KIND(str);
63 void *data = PyUnicode_DATA(str);
64
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020065 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030066 for (; pos < end; pos++, numdigits++) {
67 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 if (digitval < 0)
69 break;
70 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000071 Detect possible overflow before it happens:
72
73 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
74 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 */
Mark Dickinson47862d42011-12-01 15:27:04 +000076 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 PyErr_Format(PyExc_ValueError,
78 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030079 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020080 return -1;
81 }
Mark Dickinson47862d42011-12-01 15:27:04 +000082 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020083 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030084 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 *result = accumulator;
86 return numdigits;
87}
88
89/************************************************************************/
90/*********** standard format specifier parsing **************************/
91/************************************************************************/
92
93/* returns true if this character is a specifier alignment token */
94Py_LOCAL_INLINE(int)
95is_alignment_token(Py_UCS4 c)
96{
97 switch (c) {
98 case '<': case '>': case '=': case '^':
99 return 1;
100 default:
101 return 0;
102 }
103}
104
105/* returns true if this character is a sign element */
106Py_LOCAL_INLINE(int)
107is_sign_element(Py_UCS4 c)
108{
109 switch (c) {
110 case ' ': case '+': case '-':
111 return 1;
112 default:
113 return 0;
114 }
115}
Eric Smith8c663262007-08-25 02:26:07 +0000116
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400117/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700118enum LocaleType {
119 LT_NO_LOCALE = 0,
120 LT_DEFAULT_LOCALE,
121 LT_UNDERSCORE_LOCALE,
122 LT_UNDER_FOUR_LOCALE,
123 LT_CURRENT_LOCALE
124};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000125
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200126typedef struct {
127 Py_UCS4 fill_char;
128 Py_UCS4 align;
129 int alternate;
130 Py_UCS4 sign;
131 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700132 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200133 Py_ssize_t precision;
134 Py_UCS4 type;
135} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000136
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200137#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700138/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139static void
140DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
141{
142 printf("internal format spec: fill_char %d\n", format->fill_char);
143 printf("internal format spec: align %d\n", format->align);
144 printf("internal format spec: alternate %d\n", format->alternate);
145 printf("internal format spec: sign %d\n", format->sign);
146 printf("internal format spec: width %zd\n", format->width);
147 printf("internal format spec: thousands_separators %d\n",
148 format->thousands_separators);
149 printf("internal format spec: precision %zd\n", format->precision);
150 printf("internal format spec: type %c\n", format->type);
151 printf("\n");
152}
153#endif
154
155
156/*
157 ptr points to the start of the format_spec, end points just past its end.
158 fills in format with the parsed information.
159 returns 1 on success, 0 on failure.
160 if failure, sets the exception
161*/
162static int
163parse_internal_render_format_spec(PyObject *format_spec,
164 Py_ssize_t start, Py_ssize_t end,
165 InternalFormatSpec *format,
166 char default_type,
167 char default_align)
168{
169 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300170 int kind = PyUnicode_KIND(format_spec);
171 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 /* end-pos is used throughout this code to specify the length of
173 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300174#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175
176 Py_ssize_t consumed;
177 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400178 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181 format->align = default_align;
182 format->alternate = 0;
183 format->sign = '\0';
184 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700185 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 format->precision = -1;
187 format->type = default_type;
188
189 /* If the second char is an alignment token,
190 then parse the fill char */
191 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
192 format->align = READ_spec(pos+1);
193 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400194 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200195 align_specified = 1;
196 pos += 2;
197 }
198 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
199 format->align = READ_spec(pos);
200 align_specified = 1;
201 ++pos;
202 }
203
204 /* Parse the various sign options */
205 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
206 format->sign = READ_spec(pos);
207 ++pos;
208 }
209
210 /* If the next character is #, we're in alternate mode. This only
211 applies to integers. */
212 if (end-pos >= 1 && READ_spec(pos) == '#') {
213 format->alternate = 1;
214 ++pos;
215 }
216
217 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400218 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200219 format->fill_char = '0';
220 if (!align_specified) {
221 format->align = '=';
222 }
223 ++pos;
224 }
225
226 consumed = get_integer(format_spec, &pos, end, &format->width);
227 if (consumed == -1)
228 /* Overflow error. Exception already set. */
229 return 0;
230
231 /* If consumed is 0, we didn't consume any characters for the
232 width. In that case, reset the width to -1, because
233 get_integer() will have set it to zero. -1 is how we record
234 that the width wasn't specified. */
235 if (consumed == 0)
236 format->width = -1;
237
238 /* Comma signifies add thousands separators */
239 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400240 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200241 ++pos;
242 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400243 /* Underscore signifies add thousands separators */
244 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700245 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400246 invalid_comma_and_underscore();
247 return 0;
248 }
249 format->thousands_separators = LT_UNDERSCORE_LOCALE;
250 ++pos;
251 }
252 if (end-pos && READ_spec(pos) == ',') {
253 invalid_comma_and_underscore();
254 return 0;
255 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200256
257 /* Parse field precision */
258 if (end-pos && READ_spec(pos) == '.') {
259 ++pos;
260
261 consumed = get_integer(format_spec, &pos, end, &format->precision);
262 if (consumed == -1)
263 /* Overflow error. Exception already set. */
264 return 0;
265
266 /* Not having a precision after a dot is an error. */
267 if (consumed == 0) {
268 PyErr_Format(PyExc_ValueError,
269 "Format specifier missing precision");
270 return 0;
271 }
272
273 }
274
275 /* Finally, parse the type field. */
276
277 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500278 /* More than one char remain, invalid format specifier. */
279 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200280 return 0;
281 }
282
283 if (end-pos == 1) {
284 format->type = READ_spec(pos);
285 ++pos;
286 }
287
288 /* Do as much validating as we can, just by looking at the format
289 specifier. Do not take into account what type of formatting
290 we're doing (int, float, string). */
291
292 if (format->thousands_separators) {
293 switch (format->type) {
294 case 'd':
295 case 'e':
296 case 'f':
297 case 'g':
298 case 'E':
299 case 'G':
300 case '%':
301 case 'F':
302 case '\0':
303 /* These are allowed. See PEP 378.*/
304 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400305 case 'b':
306 case 'o':
307 case 'x':
308 case 'X':
309 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
310 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
311 /* Every four digits, not every three, in bin/oct/hex. */
312 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
313 break;
314 }
Stefan Krahf432a322017-08-21 13:09:59 +0200315 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316 default:
317 invalid_comma_type(format->type);
318 return 0;
319 }
320 }
321
Victor Stinnera4ac6002012-01-21 15:50:49 +0100322 assert (format->align <= 127);
323 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 return 1;
325}
326
327/* Calculate the padding needed. */
328static void
329calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
330 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
331 Py_ssize_t *n_total)
332{
333 if (width >= 0) {
334 if (nchars > width)
335 *n_total = nchars;
336 else
337 *n_total = width;
338 }
339 else {
340 /* not specified, use all of the chars and no more */
341 *n_total = nchars;
342 }
343
344 /* Figure out how much leading space we need, based on the
345 aligning */
346 if (align == '>')
347 *n_lpadding = *n_total - nchars;
348 else if (align == '^')
349 *n_lpadding = (*n_total - nchars) / 2;
350 else if (align == '<' || align == '=')
351 *n_lpadding = 0;
352 else {
353 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700354 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355 }
356
357 *n_rpadding = *n_total - nchars - *n_lpadding;
358}
359
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200360/* Do the padding, and return a pointer to where the caller-supplied
361 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200362static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200363fill_padding(_PyUnicodeWriter *writer,
364 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200365 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
366 Py_ssize_t n_rpadding)
367{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200368 Py_ssize_t pos;
369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200371 if (n_lpadding) {
372 pos = writer->pos;
373 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
374 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375
376 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200377 if (n_rpadding) {
378 pos = writer->pos + nchars + n_lpadding;
379 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
380 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200381
382 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200383 writer->pos += n_lpadding;
384 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200385}
386
387/************************************************************************/
388/*********** common routines for numeric formatting *********************/
389/************************************************************************/
390
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200391/* Locale info needed for formatting integers and the part of floats
392 before and including the decimal. Note that locales only support
393 8-bit chars, not unicode. */
394typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100395 PyObject *decimal_point;
396 PyObject *thousands_sep;
397 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398} LocaleInfo;
399
Victor Stinner41a863c2012-02-24 00:37:51 +0100400#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
401
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200402/* describes the layout for an integer, see the comment in
403 calc_number_widths() for details */
404typedef struct {
405 Py_ssize_t n_lpadding;
406 Py_ssize_t n_prefix;
407 Py_ssize_t n_spadding;
408 Py_ssize_t n_rpadding;
409 char sign;
410 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
411 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
412 any grouping chars. */
413 Py_ssize_t n_decimal; /* 0 if only an integer */
414 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
415 excluding the decimal itself, if
416 present. */
417
418 /* These 2 are not the widths of fields, but are needed by
419 STRINGLIB_GROUPING. */
420 Py_ssize_t n_digits; /* The number of digits before a decimal
421 or exponent. */
422 Py_ssize_t n_min_width; /* The min_width we used when we computed
423 the n_grouped_digits width. */
424} NumberFieldWidths;
425
426
427/* Given a number of the form:
428 digits[remainder]
429 where ptr points to the start and end points to the end, find where
430 the integer part ends. This could be a decimal, an exponent, both,
431 or neither.
432 If a decimal point is present, set *has_decimal and increment
433 remainder beyond it.
434 Results are undefined (but shouldn't crash) for improperly
435 formatted strings.
436*/
437static void
438parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
439 Py_ssize_t *n_remainder, int *has_decimal)
440{
441 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300442 int kind = PyUnicode_KIND(s);
443 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300445 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 ++pos;
447 remainder = pos;
448
449 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300450 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451
452 /* Skip the decimal point. */
453 if (*has_decimal)
454 remainder++;
455
456 *n_remainder = end - remainder;
457}
458
459/* not all fields of format are used. for example, precision is
460 unused. should this take discrete params in order to be more clear
461 about what it does? or is passing a single format parameter easier
462 and more efficient enough to justify a little obfuscation? */
463static Py_ssize_t
464calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
465 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
466 Py_ssize_t n_end, Py_ssize_t n_remainder,
467 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100468 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469{
470 Py_ssize_t n_non_digit_non_padding;
471 Py_ssize_t n_padding;
472
473 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
474 spec->n_lpadding = 0;
475 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100476 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200477 spec->n_remainder = n_remainder;
478 spec->n_spadding = 0;
479 spec->n_rpadding = 0;
480 spec->sign = '\0';
481 spec->n_sign = 0;
482
483 /* the output will look like:
484 | |
485 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
486 | |
487
488 sign is computed from format->sign and the actual
489 sign of the number
490
491 prefix is given (it's for the '0x' prefix)
492
493 digits is already known
494
495 the total width is either given, or computed from the
496 actual digits
497
498 only one of lpadding, spadding, and rpadding can be non-zero,
499 and it's calculated from the width and other fields
500 */
501
502 /* compute the various parts we're going to write */
503 switch (format->sign) {
504 case '+':
505 /* always put a + or - */
506 spec->n_sign = 1;
507 spec->sign = (sign_char == '-' ? '-' : '+');
508 break;
509 case ' ':
510 spec->n_sign = 1;
511 spec->sign = (sign_char == '-' ? '-' : ' ');
512 break;
513 default:
514 /* Not specified, or the default (-) */
515 if (sign_char == '-') {
516 spec->n_sign = 1;
517 spec->sign = '-';
518 }
519 }
520
521 /* The number of chars used for non-digits and non-padding. */
522 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
523 spec->n_remainder;
524
525 /* min_width can go negative, that's okay. format->width == -1 means
526 we don't care. */
527 if (format->fill_char == '0' && format->align == '=')
528 spec->n_min_width = format->width - n_non_digit_non_padding;
529 else
530 spec->n_min_width = 0;
531
532 if (spec->n_digits == 0)
533 /* This case only occurs when using 'c' formatting, we need
534 to special case it because the grouping code always wants
535 to have at least one character. */
536 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100537 else {
538 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200539 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100540 NULL, 0,
541 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100543 locale->grouping, locale->thousands_sep, &grouping_maxchar);
544 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
545 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200546
547 /* Given the desired width and the total of digit and non-digit
548 space we consume, see if we need any padding. format->width can
549 be negative (meaning no padding), but this code still works in
550 that case. */
551 n_padding = format->width -
552 (n_non_digit_non_padding + spec->n_grouped_digits);
553 if (n_padding > 0) {
554 /* Some padding is needed. Determine if it's left, space, or right. */
555 switch (format->align) {
556 case '<':
557 spec->n_rpadding = n_padding;
558 break;
559 case '^':
560 spec->n_lpadding = n_padding / 2;
561 spec->n_rpadding = n_padding - spec->n_lpadding;
562 break;
563 case '=':
564 spec->n_spadding = n_padding;
565 break;
566 case '>':
567 spec->n_lpadding = n_padding;
568 break;
569 default:
570 /* Shouldn't get here, but treat it as '>' */
Barry Warsawb2e57942017-09-14 18:13:16 -0700571 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200572 }
573 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100574
575 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
576 *maxchar = Py_MAX(*maxchar, format->fill_char);
577
Victor Stinner90f50d42012-02-24 01:44:47 +0100578 if (spec->n_decimal)
579 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
580
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
582 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
583 spec->n_remainder + spec->n_rpadding;
584}
585
586/* Fill in the digit parts of a numbers's string representation,
587 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200588 Return -1 on error, or 0 on success. */
589static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200590fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200591 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200592 PyObject *prefix, Py_ssize_t p_start,
593 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200594 LocaleInfo *locale, int toupper)
595{
596 /* Used to keep track of digits, decimal, and remainder. */
597 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200598 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200599 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200603 _PyUnicode_FastFill(writer->buffer,
604 writer->pos, spec->n_lpadding, fill_char);
605 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 }
607 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200608 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
609 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200610 }
611 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200612 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
613 prefix, p_start,
614 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200615 if (toupper) {
616 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500617 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200618 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100619 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100620 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200621 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500622 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200623 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200624 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 }
626 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 _PyUnicode_FastFill(writer->buffer,
628 writer->pos, spec->n_spadding, fill_char);
629 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200630 }
631
632 /* Only for type 'c' special case, it has no digits. */
633 if (spec->n_digits != 0) {
634 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200635 char *pdigits;
636 if (PyUnicode_READY(digits))
637 return -1;
638 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 if (PyUnicode_KIND(digits) < kind) {
640 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200641 if (pdigits == NULL)
642 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200643 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100644 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200645 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200646 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200647 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100649 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100650 if (r == -1)
651 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200652 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200653 if (PyUnicode_KIND(digits) < kind)
654 PyMem_Free(pdigits);
655 d_pos += spec->n_digits;
656 }
657 if (toupper) {
658 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500659 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200660 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100661 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500662 if (c > 127) {
663 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
664 return -1;
665 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200666 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500667 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200668 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200669 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200670
671 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200672 _PyUnicode_FastCopyCharacters(
673 writer->buffer, writer->pos,
674 locale->decimal_point, 0, spec->n_decimal);
675 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200676 d_pos += 1;
677 }
678
679 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200680 _PyUnicode_FastCopyCharacters(
681 writer->buffer, writer->pos,
682 digits, d_pos, spec->n_remainder);
683 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400684 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200685 }
686
687 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200688 _PyUnicode_FastFill(writer->buffer,
689 writer->pos, spec->n_rpadding,
690 fill_char);
691 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200692 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200693 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694}
695
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200696static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200697
698/* Find the decimal point character(s?), thousands_separator(s?), and
699 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400700 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
701 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100702static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700703get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200704{
705 switch (type) {
706 case LT_CURRENT_LOCALE: {
Victor Stinnercb064fc2018-01-15 15:58:02 +0100707 if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
708 &locale_info->thousands_sep,
709 &locale_info->grouping) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100710 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100711 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200712 break;
713 }
714 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400715 case LT_UNDERSCORE_LOCALE:
716 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100717 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400718 locale_info->thousands_sep = PyUnicode_FromOrdinal(
719 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700720 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100721 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400722 if (type != LT_UNDER_FOUR_LOCALE)
723 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200724 (implicit) trailing 0 means repeat
725 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400726 else
727 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 break;
729 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100730 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
731 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700732 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100733 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 locale_info->grouping = no_grouping;
735 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200736 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100737 return 0;
738}
739
740static void
741free_locale_info(LocaleInfo *locale_info)
742{
743 Py_XDECREF(locale_info->decimal_point);
744 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200745}
746
747/************************************************************************/
748/*********** string formatting ******************************************/
749/************************************************************************/
750
Victor Stinnerd3f08822012-05-29 12:57:52 +0200751static int
752format_string_internal(PyObject *value, const InternalFormatSpec *format,
753 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754{
755 Py_ssize_t lpad;
756 Py_ssize_t rpad;
757 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200758 Py_ssize_t len;
759 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200760 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761
Victor Stinnerd3f08822012-05-29 12:57:52 +0200762 assert(PyUnicode_IS_READY(value));
763 len = PyUnicode_GET_LENGTH(value);
764
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 /* sign is not allowed on strings */
766 if (format->sign != '\0') {
767 PyErr_SetString(PyExc_ValueError,
768 "Sign not allowed in string format specifier");
769 goto done;
770 }
771
772 /* alternate is not allowed on strings */
773 if (format->alternate) {
774 PyErr_SetString(PyExc_ValueError,
775 "Alternate form (#) not allowed in string format "
776 "specifier");
777 goto done;
778 }
779
780 /* '=' alignment not allowed on strings */
781 if (format->align == '=') {
782 PyErr_SetString(PyExc_ValueError,
783 "'=' alignment not allowed "
784 "in string format specifier");
785 goto done;
786 }
787
Victor Stinner621ef3d2012-10-02 00:33:47 +0200788 if ((format->width == -1 || format->width <= len)
789 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200790 /* Fast path */
791 return _PyUnicodeWriter_WriteStr(writer, value);
792 }
793
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200794 /* if precision is specified, output no more that format.precision
795 characters */
796 if (format->precision >= 0 && len >= format->precision) {
797 len = format->precision;
798 }
799
800 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
801
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200802 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100803 if (lpad != 0 || rpad != 0)
804 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200805 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
806 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
807 maxchar = Py_MAX(maxchar, valmaxchar);
808 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100809
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200810 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200811 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200812 goto done;
813
814 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400815 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200816 if (result == -1)
817 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818
819 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200820 if (len) {
821 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
822 value, 0, len);
823 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200824 writer->pos += (len + rpad);
825 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200826
827done:
828 return result;
829}
830
831
832/************************************************************************/
833/*********** long formatting ********************************************/
834/************************************************************************/
835
Victor Stinnerd3f08822012-05-29 12:57:52 +0200836static int
837format_long_internal(PyObject *value, const InternalFormatSpec *format,
838 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200839{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200840 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100841 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200842 PyObject *tmp = NULL;
843 Py_ssize_t inumeric_chars;
844 Py_UCS4 sign_char = '\0';
845 Py_ssize_t n_digits; /* count of digits need from the computed
846 string */
847 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
848 produces non-digits */
849 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
850 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100851 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200852 NumberFieldWidths spec;
853 long x;
854
855 /* Locale settings, either from the actual locale or
856 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100857 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858
859 /* no precision allowed on integers */
860 if (format->precision != -1) {
861 PyErr_SetString(PyExc_ValueError,
862 "Precision not allowed in integer format specifier");
863 goto done;
864 }
865
866 /* special case for character formatting */
867 if (format->type == 'c') {
868 /* error to specify a sign */
869 if (format->sign != '\0') {
870 PyErr_SetString(PyExc_ValueError,
871 "Sign not allowed with integer"
872 " format specifier 'c'");
873 goto done;
874 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400875 /* error to request alternate format */
876 if (format->alternate) {
877 PyErr_SetString(PyExc_ValueError,
878 "Alternate form (#) not allowed with integer"
879 " format specifier 'c'");
880 goto done;
881 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200882
883 /* taken from unicodeobject.c formatchar() */
884 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200885 x = PyLong_AsLong(value);
886 if (x == -1 && PyErr_Occurred())
887 goto done;
888 if (x < 0 || x > 0x10ffff) {
889 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100890 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200891 goto done;
892 }
893 tmp = PyUnicode_FromOrdinal(x);
894 inumeric_chars = 0;
895 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100896 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897
898 /* As a sort-of hack, we tell calc_number_widths that we only
899 have "remainder" characters. calc_number_widths thinks
900 these are characters that don't get formatted, only copied
901 into the output string. We do this for 'c' formatting,
902 because the characters are likely to be non-digits. */
903 n_remainder = 1;
904 }
905 else {
906 int base;
907 int leading_chars_to_skip = 0; /* Number of characters added by
908 PyNumber_ToBase that we want to
909 skip over. */
910
911 /* Compute the base and how many characters will be added by
912 PyNumber_ToBase */
913 switch (format->type) {
914 case 'b':
915 base = 2;
916 leading_chars_to_skip = 2; /* 0b */
917 break;
918 case 'o':
919 base = 8;
920 leading_chars_to_skip = 2; /* 0o */
921 break;
922 case 'x':
923 case 'X':
924 base = 16;
925 leading_chars_to_skip = 2; /* 0x */
926 break;
927 default: /* shouldn't be needed, but stops a compiler warning */
928 case 'd':
929 case 'n':
930 base = 10;
931 break;
932 }
933
Victor Stinnerd3f08822012-05-29 12:57:52 +0200934 if (format->sign != '+' && format->sign != ' '
935 && format->width == -1
936 && format->type != 'X' && format->type != 'n'
937 && !format->thousands_separators
938 && PyLong_CheckExact(value))
939 {
940 /* Fast path */
941 return _PyLong_FormatWriter(writer, value, base, format->alternate);
942 }
943
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200944 /* The number of prefix chars is the same as the leading
945 chars to skip */
946 if (format->alternate)
947 n_prefix = leading_chars_to_skip;
948
949 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200950 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200951 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
952 goto done;
953
954 inumeric_chars = 0;
955 n_digits = PyUnicode_GET_LENGTH(tmp);
956
957 prefix = inumeric_chars;
958
959 /* Is a sign character present in the output? If so, remember it
960 and skip it */
961 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
962 sign_char = '-';
963 ++prefix;
964 ++leading_chars_to_skip;
965 }
966
967 /* Skip over the leading chars (0x, 0b, etc.) */
968 n_digits -= leading_chars_to_skip;
969 inumeric_chars += leading_chars_to_skip;
970 }
971
972 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100973 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400974 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100975 &locale) == -1)
976 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977
978 /* Calculate how much memory we'll need. */
979 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100980 inumeric_chars + n_digits, n_remainder, 0,
981 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100982
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200983 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200984 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200985 goto done;
986
987 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200988 result = fill_number(writer, &spec,
989 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -0400990 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +0200991 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992
993done:
994 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +0100995 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 return result;
997}
998
999/************************************************************************/
1000/*********** float formatting *******************************************/
1001/************************************************************************/
1002
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001004static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001006 const InternalFormatSpec *format,
1007 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008{
1009 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1010 Py_ssize_t n_digits;
1011 Py_ssize_t n_remainder;
1012 Py_ssize_t n_total;
1013 int has_decimal;
1014 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001015 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001016 Py_UCS4 type = format->type;
1017 int add_pct = 0;
1018 Py_ssize_t index;
1019 NumberFieldWidths spec;
1020 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001021 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001022 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023 Py_UCS4 sign_char = '\0';
1024 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1025 PyObject *unicode_tmp = NULL;
1026
1027 /* Locale settings, either from the actual locale or
1028 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001029 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001030
Victor Stinner2f084ec2013-06-23 14:54:30 +02001031 if (format->precision > INT_MAX) {
1032 PyErr_SetString(PyExc_ValueError, "precision too big");
1033 goto done;
1034 }
1035 precision = (int)format->precision;
1036
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001037 if (format->alternate)
1038 flags |= Py_DTSF_ALT;
1039
1040 if (type == '\0') {
1041 /* Omitted type specifier. Behaves in the same way as repr(x)
1042 and str(x) if no precision is given, else like 'g', but with
1043 at least one digit after the decimal point. */
1044 flags |= Py_DTSF_ADD_DOT_0;
1045 type = 'r';
1046 default_precision = 0;
1047 }
1048
1049 if (type == 'n')
1050 /* 'n' is the same as 'g', except for the locale used to
1051 format the result. We take care of that later. */
1052 type = 'g';
1053
1054 val = PyFloat_AsDouble(value);
1055 if (val == -1.0 && PyErr_Occurred())
1056 goto done;
1057
1058 if (type == '%') {
1059 type = 'f';
1060 val *= 100;
1061 add_pct = 1;
1062 }
1063
1064 if (precision < 0)
1065 precision = default_precision;
1066 else if (type == 'r')
1067 type = 'g';
1068
Martin Panter4c359642016-05-08 13:53:41 +00001069 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001070 8-bit char. This is safe, because we've restricted what "type"
1071 can be. */
1072 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1073 &float_type);
1074 if (buf == NULL)
1075 goto done;
1076 n_digits = strlen(buf);
1077
1078 if (add_pct) {
1079 /* We know that buf has a trailing zero (since we just called
1080 strlen() on it), and we don't use that fact any more. So we
1081 can just write over the trailing zero. */
1082 buf[n_digits] = '%';
1083 n_digits += 1;
1084 }
1085
Victor Stinnerd3f08822012-05-29 12:57:52 +02001086 if (format->sign != '+' && format->sign != ' '
1087 && format->width == -1
1088 && format->type != 'n'
1089 && !format->thousands_separators)
1090 {
1091 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001092 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1093 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001094 return result;
1095 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001096
Victor Stinner4a587072013-11-19 12:54:53 +01001097 /* Since there is no unicode version of PyOS_double_to_string,
1098 just use the 8 bit version and then convert to unicode. */
1099 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1100 PyMem_Free(buf);
1101 if (unicode_tmp == NULL)
1102 goto done;
1103
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001104 /* Is a sign character present in the output? If so, remember it
1105 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001106 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001107 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1108 sign_char = '-';
1109 ++index;
1110 --n_digits;
1111 }
1112
1113 /* Determine if we have any "remainder" (after the digits, might include
1114 decimal or exponent or both (or neither)) */
1115 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1116
1117 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001118 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001119 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001120 &locale) == -1)
1121 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122
1123 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001124 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001126 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001127
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001129 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001130 goto done;
1131
1132 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001133 result = fill_number(writer, &spec,
1134 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001135 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001136 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001137
1138done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001139 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001140 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001141 return result;
1142}
1143
1144/************************************************************************/
1145/*********** complex formatting *****************************************/
1146/************************************************************************/
1147
Victor Stinnerd3f08822012-05-29 12:57:52 +02001148static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001150 const InternalFormatSpec *format,
1151 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152{
1153 double re;
1154 double im;
1155 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1156 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1157
1158 InternalFormatSpec tmp_format = *format;
1159 Py_ssize_t n_re_digits;
1160 Py_ssize_t n_im_digits;
1161 Py_ssize_t n_re_remainder;
1162 Py_ssize_t n_im_remainder;
1163 Py_ssize_t n_re_total;
1164 Py_ssize_t n_im_total;
1165 int re_has_decimal;
1166 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001167 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001168 Py_UCS4 type = format->type;
1169 Py_ssize_t i_re;
1170 Py_ssize_t i_im;
1171 NumberFieldWidths re_spec;
1172 NumberFieldWidths im_spec;
1173 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001174 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001175 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001176 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001177 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001178 Py_UCS4 re_sign_char = '\0';
1179 Py_UCS4 im_sign_char = '\0';
1180 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1181 int im_float_type;
1182 int add_parens = 0;
1183 int skip_re = 0;
1184 Py_ssize_t lpad;
1185 Py_ssize_t rpad;
1186 Py_ssize_t total;
1187 PyObject *re_unicode_tmp = NULL;
1188 PyObject *im_unicode_tmp = NULL;
1189
1190 /* Locale settings, either from the actual locale or
1191 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001192 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001193
Victor Stinner2f084ec2013-06-23 14:54:30 +02001194 if (format->precision > INT_MAX) {
1195 PyErr_SetString(PyExc_ValueError, "precision too big");
1196 goto done;
1197 }
1198 precision = (int)format->precision;
1199
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001200 /* Zero padding is not allowed. */
1201 if (format->fill_char == '0') {
1202 PyErr_SetString(PyExc_ValueError,
1203 "Zero padding is not allowed in complex format "
1204 "specifier");
1205 goto done;
1206 }
1207
1208 /* Neither is '=' alignment . */
1209 if (format->align == '=') {
1210 PyErr_SetString(PyExc_ValueError,
1211 "'=' alignment flag is not allowed in complex format "
1212 "specifier");
1213 goto done;
1214 }
1215
1216 re = PyComplex_RealAsDouble(value);
1217 if (re == -1.0 && PyErr_Occurred())
1218 goto done;
1219 im = PyComplex_ImagAsDouble(value);
1220 if (im == -1.0 && PyErr_Occurred())
1221 goto done;
1222
1223 if (format->alternate)
1224 flags |= Py_DTSF_ALT;
1225
1226 if (type == '\0') {
1227 /* Omitted type specifier. Should be like str(self). */
1228 type = 'r';
1229 default_precision = 0;
1230 if (re == 0.0 && copysign(1.0, re) == 1.0)
1231 skip_re = 1;
1232 else
1233 add_parens = 1;
1234 }
1235
1236 if (type == 'n')
1237 /* 'n' is the same as 'g', except for the locale used to
1238 format the result. We take care of that later. */
1239 type = 'g';
1240
1241 if (precision < 0)
1242 precision = default_precision;
1243 else if (type == 'r')
1244 type = 'g';
1245
Martin Panter4c359642016-05-08 13:53:41 +00001246 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001247 8-bit char. This is safe, because we've restricted what "type"
1248 can be. */
1249 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1250 &re_float_type);
1251 if (re_buf == NULL)
1252 goto done;
1253 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1254 &im_float_type);
1255 if (im_buf == NULL)
1256 goto done;
1257
1258 n_re_digits = strlen(re_buf);
1259 n_im_digits = strlen(im_buf);
1260
1261 /* Since there is no unicode version of PyOS_double_to_string,
1262 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001263 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 if (re_unicode_tmp == NULL)
1265 goto done;
1266 i_re = 0;
1267
Victor Stinnerd3f08822012-05-29 12:57:52 +02001268 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001269 if (im_unicode_tmp == NULL)
1270 goto done;
1271 i_im = 0;
1272
1273 /* Is a sign character present in the output? If so, remember it
1274 and skip it */
1275 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1276 re_sign_char = '-';
1277 ++i_re;
1278 --n_re_digits;
1279 }
1280 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1281 im_sign_char = '-';
1282 ++i_im;
1283 --n_im_digits;
1284 }
1285
1286 /* Determine if we have any "remainder" (after the digits, might include
1287 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001288 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001290 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 &n_im_remainder, &im_has_decimal);
1292
1293 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001294 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001295 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001296 &locale) == -1)
1297 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298
1299 /* Turn off any padding. We'll do it later after we've composed
1300 the numbers without padding. */
1301 tmp_format.fill_char = '\0';
1302 tmp_format.align = '<';
1303 tmp_format.width = -1;
1304
1305 /* Calculate how much memory we'll need. */
1306 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1307 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001308 re_has_decimal, &locale, &tmp_format,
1309 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310
1311 /* Same formatting, but always include a sign, unless the real part is
1312 * going to be omitted, in which case we use whatever sign convention was
1313 * requested by the original format. */
1314 if (!skip_re)
1315 tmp_format.sign = '+';
1316 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1317 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001318 im_has_decimal, &locale, &tmp_format,
1319 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320
1321 if (skip_re)
1322 n_re_total = 0;
1323
1324 /* Add 1 for the 'j', and optionally 2 for parens. */
1325 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1326 format->width, format->align, &lpad, &rpad, &total);
1327
Victor Stinner41a863c2012-02-24 00:37:51 +01001328 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001329 maxchar = Py_MAX(maxchar, format->fill_char);
1330
Victor Stinnerd3f08822012-05-29 12:57:52 +02001331 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001332 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001333 rkind = writer->kind;
1334 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335
1336 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001337 result = fill_padding(writer,
1338 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001339 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001340 if (result == -1)
1341 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342
Victor Stinnerd3f08822012-05-29 12:57:52 +02001343 if (add_parens) {
1344 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1345 writer->pos++;
1346 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001347
1348 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001349 result = fill_number(writer, &re_spec,
1350 re_unicode_tmp, i_re, i_re + n_re_digits,
1351 NULL, 0,
1352 0,
1353 &locale, 0);
1354 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001355 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001357 result = fill_number(writer, &im_spec,
1358 im_unicode_tmp, i_im, i_im + n_im_digits,
1359 NULL, 0,
1360 0,
1361 &locale, 0);
1362 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001363 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001364 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1365 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001366
Victor Stinnerd3f08822012-05-29 12:57:52 +02001367 if (add_parens) {
1368 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1369 writer->pos++;
1370 }
1371
1372 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001373
1374done:
1375 PyMem_Free(re_buf);
1376 PyMem_Free(im_buf);
1377 Py_XDECREF(re_unicode_tmp);
1378 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001379 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001380 return result;
1381}
1382
1383/************************************************************************/
1384/*********** built in formatters ****************************************/
1385/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001386static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001387format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1388{
1389 PyObject *str;
1390 int err;
1391
1392 str = PyObject_Str(obj);
1393 if (str == NULL)
1394 return -1;
1395 err = _PyUnicodeWriter_WriteStr(writer, str);
1396 Py_DECREF(str);
1397 return err;
1398}
1399
1400int
1401_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1402 PyObject *obj,
1403 PyObject *format_spec,
1404 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001405{
1406 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001407
1408 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001409
1410 /* check for the special case of zero length format spec, make
1411 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001412 if (start == end) {
1413 if (PyUnicode_CheckExact(obj))
1414 return _PyUnicodeWriter_WriteStr(writer, obj);
1415 else
1416 return format_obj(obj, writer);
1417 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001418
1419 /* parse the format_spec */
1420 if (!parse_internal_render_format_spec(format_spec, start, end,
1421 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001422 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001423
1424 /* type conversion? */
1425 switch (format.type) {
1426 case 's':
1427 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001428 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001429 default:
1430 /* unknown */
1431 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001432 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001434}
1435
Victor Stinnerd3f08822012-05-29 12:57:52 +02001436int
1437_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1438 PyObject *obj,
1439 PyObject *format_spec,
1440 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001441{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001442 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001444 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001445
1446 /* check for the special case of zero length format spec, make
1447 it equivalent to str(obj) */
1448 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001449 if (PyLong_CheckExact(obj))
1450 return _PyLong_FormatWriter(writer, obj, 10, 0);
1451 else
1452 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001453 }
1454
1455 /* parse the format_spec */
1456 if (!parse_internal_render_format_spec(format_spec, start, end,
1457 &format, 'd', '>'))
1458 goto done;
1459
1460 /* type conversion? */
1461 switch (format.type) {
1462 case 'b':
1463 case 'c':
1464 case 'd':
1465 case 'o':
1466 case 'x':
1467 case 'X':
1468 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001469 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001470 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001471 break;
1472
1473 case 'e':
1474 case 'E':
1475 case 'f':
1476 case 'F':
1477 case 'g':
1478 case 'G':
1479 case '%':
1480 /* convert to float */
1481 tmp = PyNumber_Float(obj);
1482 if (tmp == NULL)
1483 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001484 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001485 break;
1486
1487 default:
1488 /* unknown */
1489 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1490 goto done;
1491 }
1492
1493done:
1494 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001495 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001496 return result;
1497}
1498
Victor Stinnerd3f08822012-05-29 12:57:52 +02001499int
1500_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1501 PyObject *obj,
1502 PyObject *format_spec,
1503 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001504{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001505 InternalFormatSpec format;
1506
1507 /* check for the special case of zero length format spec, make
1508 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001509 if (start == end)
1510 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511
1512 /* parse the format_spec */
1513 if (!parse_internal_render_format_spec(format_spec, start, end,
1514 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001515 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001516
1517 /* type conversion? */
1518 switch (format.type) {
1519 case '\0': /* No format code: like 'g', but with at least one decimal. */
1520 case 'e':
1521 case 'E':
1522 case 'f':
1523 case 'F':
1524 case 'g':
1525 case 'G':
1526 case 'n':
1527 case '%':
1528 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001529 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001530
1531 default:
1532 /* unknown */
1533 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001534 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001535 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001536}
1537
Victor Stinnerd3f08822012-05-29 12:57:52 +02001538int
1539_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1540 PyObject *obj,
1541 PyObject *format_spec,
1542 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001543{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001544 InternalFormatSpec format;
1545
1546 /* check for the special case of zero length format spec, make
1547 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001548 if (start == end)
1549 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001550
1551 /* parse the format_spec */
1552 if (!parse_internal_render_format_spec(format_spec, start, end,
1553 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001554 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001555
1556 /* type conversion? */
1557 switch (format.type) {
1558 case '\0': /* No format code: like 'g', but with at least one decimal. */
1559 case 'e':
1560 case 'E':
1561 case 'f':
1562 case 'F':
1563 case 'g':
1564 case 'G':
1565 case 'n':
1566 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001567 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001568
1569 default:
1570 /* unknown */
1571 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001572 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001573 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001574}