blob: ef81d15bc568b4314b582a652f0808f8dc959fd2 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070031invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020032{
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070033 assert(specifier == ',' || specifier == '_');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020034 if (presentation_type > 32 && presentation_type < 128)
35 PyErr_Format(PyExc_ValueError,
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070036 "Cannot specify '%c' with '%c'.",
37 specifier, (char)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020038 else
39 PyErr_Format(PyExc_ValueError,
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070040 "Cannot specify '%c' with '\\x%x'.",
41 specifier, (unsigned int)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020042}
43
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040044static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070045invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040046{
47 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
48}
49
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020050/*
51 get_integer consumes 0 or more decimal digit characters from an
52 input string, updates *result with the corresponding positive
53 integer, and returns the number of digits consumed.
54
55 returns -1 on error.
56*/
57static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030058get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059 Py_ssize_t *result)
60{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030061 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020062 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030063 int kind = PyUnicode_KIND(str);
64 void *data = PyUnicode_DATA(str);
65
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020066 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030067 for (; pos < end; pos++, numdigits++) {
68 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020069 if (digitval < 0)
70 break;
71 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000072 Detect possible overflow before it happens:
73
74 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
75 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020076 */
Mark Dickinson47862d42011-12-01 15:27:04 +000077 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020078 PyErr_Format(PyExc_ValueError,
79 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030080 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020081 return -1;
82 }
Mark Dickinson47862d42011-12-01 15:27:04 +000083 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020084 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030085 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020086 *result = accumulator;
87 return numdigits;
88}
89
90/************************************************************************/
91/*********** standard format specifier parsing **************************/
92/************************************************************************/
93
94/* returns true if this character is a specifier alignment token */
95Py_LOCAL_INLINE(int)
96is_alignment_token(Py_UCS4 c)
97{
98 switch (c) {
99 case '<': case '>': case '=': case '^':
100 return 1;
101 default:
102 return 0;
103 }
104}
105
106/* returns true if this character is a sign element */
107Py_LOCAL_INLINE(int)
108is_sign_element(Py_UCS4 c)
109{
110 switch (c) {
111 case ' ': case '+': case '-':
112 return 1;
113 default:
114 return 0;
115 }
116}
Eric Smith8c663262007-08-25 02:26:07 +0000117
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400118/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700119enum LocaleType {
120 LT_NO_LOCALE = 0,
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -0700121 LT_DEFAULT_LOCALE = ',',
122 LT_UNDERSCORE_LOCALE = '_',
Benjamin Peterson995026a2016-09-13 22:46:15 -0700123 LT_UNDER_FOUR_LOCALE,
124 LT_CURRENT_LOCALE
125};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000126
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127typedef struct {
128 Py_UCS4 fill_char;
129 Py_UCS4 align;
130 int alternate;
131 Py_UCS4 sign;
132 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700133 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200134 Py_ssize_t precision;
135 Py_UCS4 type;
136} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000137
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200138#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700139/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200140static void
141DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
142{
143 printf("internal format spec: fill_char %d\n", format->fill_char);
144 printf("internal format spec: align %d\n", format->align);
145 printf("internal format spec: alternate %d\n", format->alternate);
146 printf("internal format spec: sign %d\n", format->sign);
147 printf("internal format spec: width %zd\n", format->width);
148 printf("internal format spec: thousands_separators %d\n",
149 format->thousands_separators);
150 printf("internal format spec: precision %zd\n", format->precision);
151 printf("internal format spec: type %c\n", format->type);
152 printf("\n");
153}
154#endif
155
156
157/*
158 ptr points to the start of the format_spec, end points just past its end.
159 fills in format with the parsed information.
160 returns 1 on success, 0 on failure.
161 if failure, sets the exception
162*/
163static int
164parse_internal_render_format_spec(PyObject *format_spec,
165 Py_ssize_t start, Py_ssize_t end,
166 InternalFormatSpec *format,
167 char default_type,
168 char default_align)
169{
170 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300171 int kind = PyUnicode_KIND(format_spec);
172 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200173 /* end-pos is used throughout this code to specify the length of
174 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300175#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176
177 Py_ssize_t consumed;
178 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400179 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200180
Eric V. Smith2ea97122014-04-14 11:55:10 -0400181 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200182 format->align = default_align;
183 format->alternate = 0;
184 format->sign = '\0';
185 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700186 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 format->precision = -1;
188 format->type = default_type;
189
190 /* If the second char is an alignment token,
191 then parse the fill char */
192 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
193 format->align = READ_spec(pos+1);
194 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400195 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 align_specified = 1;
197 pos += 2;
198 }
199 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
200 format->align = READ_spec(pos);
201 align_specified = 1;
202 ++pos;
203 }
204
205 /* Parse the various sign options */
206 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
207 format->sign = READ_spec(pos);
208 ++pos;
209 }
210
211 /* If the next character is #, we're in alternate mode. This only
212 applies to integers. */
213 if (end-pos >= 1 && READ_spec(pos) == '#') {
214 format->alternate = 1;
215 ++pos;
216 }
217
218 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400219 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200220 format->fill_char = '0';
221 if (!align_specified) {
222 format->align = '=';
223 }
224 ++pos;
225 }
226
227 consumed = get_integer(format_spec, &pos, end, &format->width);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* If consumed is 0, we didn't consume any characters for the
233 width. In that case, reset the width to -1, because
234 get_integer() will have set it to zero. -1 is how we record
235 that the width wasn't specified. */
236 if (consumed == 0)
237 format->width = -1;
238
239 /* Comma signifies add thousands separators */
240 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400241 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200242 ++pos;
243 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400244 /* Underscore signifies add thousands separators */
245 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700246 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400247 invalid_comma_and_underscore();
248 return 0;
249 }
250 format->thousands_separators = LT_UNDERSCORE_LOCALE;
251 ++pos;
252 }
253 if (end-pos && READ_spec(pos) == ',') {
254 invalid_comma_and_underscore();
255 return 0;
256 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200257
258 /* Parse field precision */
259 if (end-pos && READ_spec(pos) == '.') {
260 ++pos;
261
262 consumed = get_integer(format_spec, &pos, end, &format->precision);
263 if (consumed == -1)
264 /* Overflow error. Exception already set. */
265 return 0;
266
267 /* Not having a precision after a dot is an error. */
268 if (consumed == 0) {
269 PyErr_Format(PyExc_ValueError,
270 "Format specifier missing precision");
271 return 0;
272 }
273
274 }
275
276 /* Finally, parse the type field. */
277
278 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500279 /* More than one char remain, invalid format specifier. */
280 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200281 return 0;
282 }
283
284 if (end-pos == 1) {
285 format->type = READ_spec(pos);
286 ++pos;
287 }
288
289 /* Do as much validating as we can, just by looking at the format
290 specifier. Do not take into account what type of formatting
291 we're doing (int, float, string). */
292
293 if (format->thousands_separators) {
294 switch (format->type) {
295 case 'd':
296 case 'e':
297 case 'f':
298 case 'g':
299 case 'E':
300 case 'G':
301 case '%':
302 case 'F':
303 case '\0':
304 /* These are allowed. See PEP 378.*/
305 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400306 case 'b':
307 case 'o':
308 case 'x':
309 case 'X':
310 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
311 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
312 /* Every four digits, not every three, in bin/oct/hex. */
313 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
314 break;
315 }
Stefan Krahf432a322017-08-21 13:09:59 +0200316 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 default:
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -0700318 invalid_thousands_separator_type(format->thousands_separators, format->type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319 return 0;
320 }
321 }
322
Victor Stinnera4ac6002012-01-21 15:50:49 +0100323 assert (format->align <= 127);
324 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200325 return 1;
326}
327
328/* Calculate the padding needed. */
329static void
330calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
331 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
332 Py_ssize_t *n_total)
333{
334 if (width >= 0) {
335 if (nchars > width)
336 *n_total = nchars;
337 else
338 *n_total = width;
339 }
340 else {
341 /* not specified, use all of the chars and no more */
342 *n_total = nchars;
343 }
344
345 /* Figure out how much leading space we need, based on the
346 aligning */
347 if (align == '>')
348 *n_lpadding = *n_total - nchars;
349 else if (align == '^')
350 *n_lpadding = (*n_total - nchars) / 2;
351 else if (align == '<' || align == '=')
352 *n_lpadding = 0;
353 else {
354 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700355 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356 }
357
358 *n_rpadding = *n_total - nchars - *n_lpadding;
359}
360
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200361/* Do the padding, and return a pointer to where the caller-supplied
362 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200363static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200364fill_padding(_PyUnicodeWriter *writer,
365 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
367 Py_ssize_t n_rpadding)
368{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200369 Py_ssize_t pos;
370
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200372 if (n_lpadding) {
373 pos = writer->pos;
374 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376
377 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200378 if (n_rpadding) {
379 pos = writer->pos + nchars + n_lpadding;
380 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
381 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200382
383 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200384 writer->pos += n_lpadding;
385 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200386}
387
388/************************************************************************/
389/*********** common routines for numeric formatting *********************/
390/************************************************************************/
391
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200392/* Locale info needed for formatting integers and the part of floats
393 before and including the decimal. Note that locales only support
394 8-bit chars, not unicode. */
395typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100396 PyObject *decimal_point;
397 PyObject *thousands_sep;
398 const char *grouping;
Victor Stinner6eff6b82018-11-20 22:06:21 +0100399 char *grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400} LocaleInfo;
401
Victor Stinner6eff6b82018-11-20 22:06:21 +0100402#define STATIC_LOCALE_INFO_INIT {0, 0, 0, 0}
Victor Stinner41a863c2012-02-24 00:37:51 +0100403
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200404/* describes the layout for an integer, see the comment in
405 calc_number_widths() for details */
406typedef struct {
407 Py_ssize_t n_lpadding;
408 Py_ssize_t n_prefix;
409 Py_ssize_t n_spadding;
410 Py_ssize_t n_rpadding;
411 char sign;
412 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
413 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
414 any grouping chars. */
415 Py_ssize_t n_decimal; /* 0 if only an integer */
416 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
417 excluding the decimal itself, if
418 present. */
419
420 /* These 2 are not the widths of fields, but are needed by
421 STRINGLIB_GROUPING. */
422 Py_ssize_t n_digits; /* The number of digits before a decimal
423 or exponent. */
424 Py_ssize_t n_min_width; /* The min_width we used when we computed
425 the n_grouped_digits width. */
426} NumberFieldWidths;
427
428
429/* Given a number of the form:
430 digits[remainder]
431 where ptr points to the start and end points to the end, find where
432 the integer part ends. This could be a decimal, an exponent, both,
433 or neither.
434 If a decimal point is present, set *has_decimal and increment
435 remainder beyond it.
436 Results are undefined (but shouldn't crash) for improperly
437 formatted strings.
438*/
439static void
440parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
441 Py_ssize_t *n_remainder, int *has_decimal)
442{
443 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300444 int kind = PyUnicode_KIND(s);
445 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300447 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 ++pos;
449 remainder = pos;
450
451 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300452 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453
454 /* Skip the decimal point. */
455 if (*has_decimal)
456 remainder++;
457
458 *n_remainder = end - remainder;
459}
460
461/* not all fields of format are used. for example, precision is
462 unused. should this take discrete params in order to be more clear
463 about what it does? or is passing a single format parameter easier
Victor Stinner6f5fa1b2018-11-26 14:17:01 +0100464 and more efficient enough to justify a little obfuscation?
465 Return -1 on error. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466static Py_ssize_t
467calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
468 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
469 Py_ssize_t n_end, Py_ssize_t n_remainder,
470 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100471 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472{
473 Py_ssize_t n_non_digit_non_padding;
474 Py_ssize_t n_padding;
475
476 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
477 spec->n_lpadding = 0;
478 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100479 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 spec->n_remainder = n_remainder;
481 spec->n_spadding = 0;
482 spec->n_rpadding = 0;
483 spec->sign = '\0';
484 spec->n_sign = 0;
485
486 /* the output will look like:
487 | |
488 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
489 | |
490
491 sign is computed from format->sign and the actual
492 sign of the number
493
494 prefix is given (it's for the '0x' prefix)
495
496 digits is already known
497
498 the total width is either given, or computed from the
499 actual digits
500
501 only one of lpadding, spadding, and rpadding can be non-zero,
502 and it's calculated from the width and other fields
503 */
504
505 /* compute the various parts we're going to write */
506 switch (format->sign) {
507 case '+':
508 /* always put a + or - */
509 spec->n_sign = 1;
510 spec->sign = (sign_char == '-' ? '-' : '+');
511 break;
512 case ' ':
513 spec->n_sign = 1;
514 spec->sign = (sign_char == '-' ? '-' : ' ');
515 break;
516 default:
517 /* Not specified, or the default (-) */
518 if (sign_char == '-') {
519 spec->n_sign = 1;
520 spec->sign = '-';
521 }
522 }
523
524 /* The number of chars used for non-digits and non-padding. */
525 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
526 spec->n_remainder;
527
528 /* min_width can go negative, that's okay. format->width == -1 means
529 we don't care. */
530 if (format->fill_char == '0' && format->align == '=')
531 spec->n_min_width = format->width - n_non_digit_non_padding;
532 else
533 spec->n_min_width = 0;
534
535 if (spec->n_digits == 0)
536 /* This case only occurs when using 'c' formatting, we need
537 to special case it because the grouping code always wants
538 to have at least one character. */
539 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100540 else {
541 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100543 NULL, 0,
Victor Stinner6f5fa1b2018-11-26 14:17:01 +0100544 NULL, 0, spec->n_digits,
545 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100546 locale->grouping, locale->thousands_sep, &grouping_maxchar);
Victor Stinner6f5fa1b2018-11-26 14:17:01 +0100547 if (spec->n_grouped_digits == -1) {
548 return -1;
549 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100550 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
551 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200552
553 /* Given the desired width and the total of digit and non-digit
554 space we consume, see if we need any padding. format->width can
555 be negative (meaning no padding), but this code still works in
556 that case. */
557 n_padding = format->width -
558 (n_non_digit_non_padding + spec->n_grouped_digits);
559 if (n_padding > 0) {
560 /* Some padding is needed. Determine if it's left, space, or right. */
561 switch (format->align) {
562 case '<':
563 spec->n_rpadding = n_padding;
564 break;
565 case '^':
566 spec->n_lpadding = n_padding / 2;
567 spec->n_rpadding = n_padding - spec->n_lpadding;
568 break;
569 case '=':
570 spec->n_spadding = n_padding;
571 break;
572 case '>':
573 spec->n_lpadding = n_padding;
574 break;
575 default:
576 /* Shouldn't get here, but treat it as '>' */
Barry Warsawb2e57942017-09-14 18:13:16 -0700577 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200578 }
579 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100580
581 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
582 *maxchar = Py_MAX(*maxchar, format->fill_char);
583
Victor Stinner90f50d42012-02-24 01:44:47 +0100584 if (spec->n_decimal)
585 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
586
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200587 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
588 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
589 spec->n_remainder + spec->n_rpadding;
590}
591
592/* Fill in the digit parts of a numbers's string representation,
593 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200594 Return -1 on error, or 0 on success. */
595static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200596fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200597 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200598 PyObject *prefix, Py_ssize_t p_start,
599 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 LocaleInfo *locale, int toupper)
601{
602 /* Used to keep track of digits, decimal, and remainder. */
603 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200604 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200605 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607
608 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200609 _PyUnicode_FastFill(writer->buffer,
610 writer->pos, spec->n_lpadding, fill_char);
611 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200612 }
613 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200614 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
615 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616 }
617 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200618 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
619 prefix, p_start,
620 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200621 if (toupper) {
622 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500623 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200624 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100625 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100626 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500628 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200630 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200631 }
632 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200633 _PyUnicode_FastFill(writer->buffer,
634 writer->pos, spec->n_spadding, fill_char);
635 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200636 }
637
638 /* Only for type 'c' special case, it has no digits. */
639 if (spec->n_digits != 0) {
640 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinner90f50d42012-02-24 01:44:47 +0100641 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinner6f5fa1b2018-11-26 14:17:01 +0100642 writer, spec->n_grouped_digits,
643 digits, d_pos, spec->n_digits,
644 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100645 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100646 if (r == -1)
647 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 d_pos += spec->n_digits;
650 }
651 if (toupper) {
652 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500653 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200654 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100655 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500656 if (c > 127) {
657 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
658 return -1;
659 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200660 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500661 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200662 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200663 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200664
665 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200666 _PyUnicode_FastCopyCharacters(
667 writer->buffer, writer->pos,
668 locale->decimal_point, 0, spec->n_decimal);
669 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200670 d_pos += 1;
671 }
672
673 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200674 _PyUnicode_FastCopyCharacters(
675 writer->buffer, writer->pos,
676 digits, d_pos, spec->n_remainder);
677 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400678 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200679 }
680
681 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200682 _PyUnicode_FastFill(writer->buffer,
683 writer->pos, spec->n_rpadding,
684 fill_char);
685 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200686 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200687 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200688}
689
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200690static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691
692/* Find the decimal point character(s?), thousands_separator(s?), and
693 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400694 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
695 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100696static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700697get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200698{
699 switch (type) {
700 case LT_CURRENT_LOCALE: {
Victor Stinner6eff6b82018-11-20 22:06:21 +0100701 const char *grouping;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100702 if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
703 &locale_info->thousands_sep,
Victor Stinner6eff6b82018-11-20 22:06:21 +0100704 &grouping) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100705 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100706 }
Victor Stinner6eff6b82018-11-20 22:06:21 +0100707
708 /* localeconv() grouping can become a dangling pointer or point
709 to a different string if another thread calls localeconv() during
710 the string formatting. Copy the string to avoid this risk. */
711 locale_info->grouping_buffer = _PyMem_Strdup(grouping);
712 if (locale_info->grouping_buffer == NULL) {
713 PyErr_NoMemory();
714 return -1;
715 }
716 locale_info->grouping = locale_info->grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200717 break;
718 }
719 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400720 case LT_UNDERSCORE_LOCALE:
721 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100722 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400723 locale_info->thousands_sep = PyUnicode_FromOrdinal(
724 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700725 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100726 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400727 if (type != LT_UNDER_FOUR_LOCALE)
728 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200729 (implicit) trailing 0 means repeat
730 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400731 else
732 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733 break;
734 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100735 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
736 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700737 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100738 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200739 locale_info->grouping = no_grouping;
740 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200741 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100742 return 0;
743}
744
745static void
746free_locale_info(LocaleInfo *locale_info)
747{
748 Py_XDECREF(locale_info->decimal_point);
749 Py_XDECREF(locale_info->thousands_sep);
Victor Stinner6eff6b82018-11-20 22:06:21 +0100750 PyMem_Free(locale_info->grouping_buffer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200751}
752
753/************************************************************************/
754/*********** string formatting ******************************************/
755/************************************************************************/
756
Victor Stinnerd3f08822012-05-29 12:57:52 +0200757static int
758format_string_internal(PyObject *value, const InternalFormatSpec *format,
759 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200760{
761 Py_ssize_t lpad;
762 Py_ssize_t rpad;
763 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200764 Py_ssize_t len;
765 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200766 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200767
Victor Stinnerd3f08822012-05-29 12:57:52 +0200768 assert(PyUnicode_IS_READY(value));
769 len = PyUnicode_GET_LENGTH(value);
770
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771 /* sign is not allowed on strings */
772 if (format->sign != '\0') {
773 PyErr_SetString(PyExc_ValueError,
774 "Sign not allowed in string format specifier");
775 goto done;
776 }
777
778 /* alternate is not allowed on strings */
779 if (format->alternate) {
780 PyErr_SetString(PyExc_ValueError,
781 "Alternate form (#) not allowed in string format "
782 "specifier");
783 goto done;
784 }
785
786 /* '=' alignment not allowed on strings */
787 if (format->align == '=') {
788 PyErr_SetString(PyExc_ValueError,
789 "'=' alignment not allowed "
790 "in string format specifier");
791 goto done;
792 }
793
Victor Stinner621ef3d2012-10-02 00:33:47 +0200794 if ((format->width == -1 || format->width <= len)
795 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200796 /* Fast path */
797 return _PyUnicodeWriter_WriteStr(writer, value);
798 }
799
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200800 /* if precision is specified, output no more that format.precision
801 characters */
802 if (format->precision >= 0 && len >= format->precision) {
803 len = format->precision;
804 }
805
806 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
807
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200808 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100809 if (lpad != 0 || rpad != 0)
810 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200811 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
812 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
813 maxchar = Py_MAX(maxchar, valmaxchar);
814 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100815
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200817 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 goto done;
819
820 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400821 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200822 if (result == -1)
823 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200824
825 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200826 if (len) {
827 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
828 value, 0, len);
829 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200830 writer->pos += (len + rpad);
831 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200832
833done:
834 return result;
835}
836
837
838/************************************************************************/
839/*********** long formatting ********************************************/
840/************************************************************************/
841
Victor Stinnerd3f08822012-05-29 12:57:52 +0200842static int
843format_long_internal(PyObject *value, const InternalFormatSpec *format,
844 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200845{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200846 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100847 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200848 PyObject *tmp = NULL;
849 Py_ssize_t inumeric_chars;
850 Py_UCS4 sign_char = '\0';
851 Py_ssize_t n_digits; /* count of digits need from the computed
852 string */
853 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
854 produces non-digits */
855 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
856 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100857 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 NumberFieldWidths spec;
859 long x;
860
861 /* Locale settings, either from the actual locale or
862 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100863 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864
865 /* no precision allowed on integers */
866 if (format->precision != -1) {
867 PyErr_SetString(PyExc_ValueError,
868 "Precision not allowed in integer format specifier");
869 goto done;
870 }
871
872 /* special case for character formatting */
873 if (format->type == 'c') {
874 /* error to specify a sign */
875 if (format->sign != '\0') {
876 PyErr_SetString(PyExc_ValueError,
877 "Sign not allowed with integer"
878 " format specifier 'c'");
879 goto done;
880 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400881 /* error to request alternate format */
882 if (format->alternate) {
883 PyErr_SetString(PyExc_ValueError,
884 "Alternate form (#) not allowed with integer"
885 " format specifier 'c'");
886 goto done;
887 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200888
889 /* taken from unicodeobject.c formatchar() */
890 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200891 x = PyLong_AsLong(value);
892 if (x == -1 && PyErr_Occurred())
893 goto done;
894 if (x < 0 || x > 0x10ffff) {
895 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100896 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897 goto done;
898 }
899 tmp = PyUnicode_FromOrdinal(x);
900 inumeric_chars = 0;
901 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100902 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200903
904 /* As a sort-of hack, we tell calc_number_widths that we only
905 have "remainder" characters. calc_number_widths thinks
906 these are characters that don't get formatted, only copied
907 into the output string. We do this for 'c' formatting,
908 because the characters are likely to be non-digits. */
909 n_remainder = 1;
910 }
911 else {
912 int base;
913 int leading_chars_to_skip = 0; /* Number of characters added by
914 PyNumber_ToBase that we want to
915 skip over. */
916
917 /* Compute the base and how many characters will be added by
918 PyNumber_ToBase */
919 switch (format->type) {
920 case 'b':
921 base = 2;
922 leading_chars_to_skip = 2; /* 0b */
923 break;
924 case 'o':
925 base = 8;
926 leading_chars_to_skip = 2; /* 0o */
927 break;
928 case 'x':
929 case 'X':
930 base = 16;
931 leading_chars_to_skip = 2; /* 0x */
932 break;
933 default: /* shouldn't be needed, but stops a compiler warning */
934 case 'd':
935 case 'n':
936 base = 10;
937 break;
938 }
939
Victor Stinnerd3f08822012-05-29 12:57:52 +0200940 if (format->sign != '+' && format->sign != ' '
941 && format->width == -1
942 && format->type != 'X' && format->type != 'n'
943 && !format->thousands_separators
944 && PyLong_CheckExact(value))
945 {
946 /* Fast path */
947 return _PyLong_FormatWriter(writer, value, base, format->alternate);
948 }
949
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200950 /* The number of prefix chars is the same as the leading
951 chars to skip */
952 if (format->alternate)
953 n_prefix = leading_chars_to_skip;
954
955 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200956 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200957 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
958 goto done;
959
960 inumeric_chars = 0;
961 n_digits = PyUnicode_GET_LENGTH(tmp);
962
963 prefix = inumeric_chars;
964
965 /* Is a sign character present in the output? If so, remember it
966 and skip it */
967 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
968 sign_char = '-';
969 ++prefix;
970 ++leading_chars_to_skip;
971 }
972
973 /* Skip over the leading chars (0x, 0b, etc.) */
974 n_digits -= leading_chars_to_skip;
975 inumeric_chars += leading_chars_to_skip;
976 }
977
978 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100979 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400980 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100981 &locale) == -1)
982 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200983
984 /* Calculate how much memory we'll need. */
985 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100986 inumeric_chars + n_digits, n_remainder, 0,
987 &locale, format, &maxchar);
Victor Stinner6f5fa1b2018-11-26 14:17:01 +0100988 if (n_total == -1) {
989 goto done;
990 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100991
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200993 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 goto done;
995
996 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200997 result = fill_number(writer, &spec,
998 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -0400999 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001000 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001001
1002done:
1003 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001004 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 return result;
1006}
1007
1008/************************************************************************/
1009/*********** float formatting *******************************************/
1010/************************************************************************/
1011
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001013static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001015 const InternalFormatSpec *format,
1016 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001017{
1018 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1019 Py_ssize_t n_digits;
1020 Py_ssize_t n_remainder;
1021 Py_ssize_t n_total;
1022 int has_decimal;
1023 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001024 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 Py_UCS4 type = format->type;
1026 int add_pct = 0;
1027 Py_ssize_t index;
1028 NumberFieldWidths spec;
1029 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001030 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001031 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001032 Py_UCS4 sign_char = '\0';
1033 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1034 PyObject *unicode_tmp = NULL;
1035
1036 /* Locale settings, either from the actual locale or
1037 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001038 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001039
Victor Stinner2f084ec2013-06-23 14:54:30 +02001040 if (format->precision > INT_MAX) {
1041 PyErr_SetString(PyExc_ValueError, "precision too big");
1042 goto done;
1043 }
1044 precision = (int)format->precision;
1045
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001046 if (format->alternate)
1047 flags |= Py_DTSF_ALT;
1048
1049 if (type == '\0') {
1050 /* Omitted type specifier. Behaves in the same way as repr(x)
1051 and str(x) if no precision is given, else like 'g', but with
1052 at least one digit after the decimal point. */
1053 flags |= Py_DTSF_ADD_DOT_0;
1054 type = 'r';
1055 default_precision = 0;
1056 }
1057
1058 if (type == 'n')
1059 /* 'n' is the same as 'g', except for the locale used to
1060 format the result. We take care of that later. */
1061 type = 'g';
1062
1063 val = PyFloat_AsDouble(value);
1064 if (val == -1.0 && PyErr_Occurred())
1065 goto done;
1066
1067 if (type == '%') {
1068 type = 'f';
1069 val *= 100;
1070 add_pct = 1;
1071 }
1072
1073 if (precision < 0)
1074 precision = default_precision;
1075 else if (type == 'r')
1076 type = 'g';
1077
Martin Panter4c359642016-05-08 13:53:41 +00001078 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001079 8-bit char. This is safe, because we've restricted what "type"
1080 can be. */
1081 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1082 &float_type);
1083 if (buf == NULL)
1084 goto done;
1085 n_digits = strlen(buf);
1086
1087 if (add_pct) {
1088 /* We know that buf has a trailing zero (since we just called
1089 strlen() on it), and we don't use that fact any more. So we
1090 can just write over the trailing zero. */
1091 buf[n_digits] = '%';
1092 n_digits += 1;
1093 }
1094
Victor Stinnerd3f08822012-05-29 12:57:52 +02001095 if (format->sign != '+' && format->sign != ' '
1096 && format->width == -1
1097 && format->type != 'n'
1098 && !format->thousands_separators)
1099 {
1100 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001101 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1102 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001103 return result;
1104 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001105
Victor Stinner4a587072013-11-19 12:54:53 +01001106 /* Since there is no unicode version of PyOS_double_to_string,
1107 just use the 8 bit version and then convert to unicode. */
1108 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1109 PyMem_Free(buf);
1110 if (unicode_tmp == NULL)
1111 goto done;
1112
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 /* Is a sign character present in the output? If so, remember it
1114 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001115 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001116 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1117 sign_char = '-';
1118 ++index;
1119 --n_digits;
1120 }
1121
1122 /* Determine if we have any "remainder" (after the digits, might include
1123 decimal or exponent or both (or neither)) */
1124 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1125
1126 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001127 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001128 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001129 &locale) == -1)
1130 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001131
1132 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001133 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001135 &locale, format, &maxchar);
Victor Stinner6f5fa1b2018-11-26 14:17:01 +01001136 if (n_total == -1) {
1137 goto done;
1138 }
Victor Stinnera4ac6002012-01-21 15:50:49 +01001139
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001141 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 goto done;
1143
1144 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001145 result = fill_number(writer, &spec,
1146 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001147 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001148 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149
1150done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001151 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001152 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001153 return result;
1154}
1155
1156/************************************************************************/
1157/*********** complex formatting *****************************************/
1158/************************************************************************/
1159
Victor Stinnerd3f08822012-05-29 12:57:52 +02001160static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001161format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001162 const InternalFormatSpec *format,
1163 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001164{
1165 double re;
1166 double im;
1167 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1168 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1169
1170 InternalFormatSpec tmp_format = *format;
1171 Py_ssize_t n_re_digits;
1172 Py_ssize_t n_im_digits;
1173 Py_ssize_t n_re_remainder;
1174 Py_ssize_t n_im_remainder;
1175 Py_ssize_t n_re_total;
1176 Py_ssize_t n_im_total;
1177 int re_has_decimal;
1178 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001179 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001180 Py_UCS4 type = format->type;
1181 Py_ssize_t i_re;
1182 Py_ssize_t i_im;
1183 NumberFieldWidths re_spec;
1184 NumberFieldWidths im_spec;
1185 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001186 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001187 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001188 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001189 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001190 Py_UCS4 re_sign_char = '\0';
1191 Py_UCS4 im_sign_char = '\0';
1192 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1193 int im_float_type;
1194 int add_parens = 0;
1195 int skip_re = 0;
1196 Py_ssize_t lpad;
1197 Py_ssize_t rpad;
1198 Py_ssize_t total;
1199 PyObject *re_unicode_tmp = NULL;
1200 PyObject *im_unicode_tmp = NULL;
1201
1202 /* Locale settings, either from the actual locale or
1203 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001204 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001205
Victor Stinner2f084ec2013-06-23 14:54:30 +02001206 if (format->precision > INT_MAX) {
1207 PyErr_SetString(PyExc_ValueError, "precision too big");
1208 goto done;
1209 }
1210 precision = (int)format->precision;
1211
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001212 /* Zero padding is not allowed. */
1213 if (format->fill_char == '0') {
1214 PyErr_SetString(PyExc_ValueError,
1215 "Zero padding is not allowed in complex format "
1216 "specifier");
1217 goto done;
1218 }
1219
1220 /* Neither is '=' alignment . */
1221 if (format->align == '=') {
1222 PyErr_SetString(PyExc_ValueError,
1223 "'=' alignment flag is not allowed in complex format "
1224 "specifier");
1225 goto done;
1226 }
1227
1228 re = PyComplex_RealAsDouble(value);
1229 if (re == -1.0 && PyErr_Occurred())
1230 goto done;
1231 im = PyComplex_ImagAsDouble(value);
1232 if (im == -1.0 && PyErr_Occurred())
1233 goto done;
1234
1235 if (format->alternate)
1236 flags |= Py_DTSF_ALT;
1237
1238 if (type == '\0') {
1239 /* Omitted type specifier. Should be like str(self). */
1240 type = 'r';
1241 default_precision = 0;
1242 if (re == 0.0 && copysign(1.0, re) == 1.0)
1243 skip_re = 1;
1244 else
1245 add_parens = 1;
1246 }
1247
1248 if (type == 'n')
1249 /* 'n' is the same as 'g', except for the locale used to
1250 format the result. We take care of that later. */
1251 type = 'g';
1252
1253 if (precision < 0)
1254 precision = default_precision;
1255 else if (type == 'r')
1256 type = 'g';
1257
Martin Panter4c359642016-05-08 13:53:41 +00001258 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001259 8-bit char. This is safe, because we've restricted what "type"
1260 can be. */
1261 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1262 &re_float_type);
1263 if (re_buf == NULL)
1264 goto done;
1265 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1266 &im_float_type);
1267 if (im_buf == NULL)
1268 goto done;
1269
1270 n_re_digits = strlen(re_buf);
1271 n_im_digits = strlen(im_buf);
1272
1273 /* Since there is no unicode version of PyOS_double_to_string,
1274 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001275 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001276 if (re_unicode_tmp == NULL)
1277 goto done;
1278 i_re = 0;
1279
Victor Stinnerd3f08822012-05-29 12:57:52 +02001280 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001281 if (im_unicode_tmp == NULL)
1282 goto done;
1283 i_im = 0;
1284
1285 /* Is a sign character present in the output? If so, remember it
1286 and skip it */
1287 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1288 re_sign_char = '-';
1289 ++i_re;
1290 --n_re_digits;
1291 }
1292 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1293 im_sign_char = '-';
1294 ++i_im;
1295 --n_im_digits;
1296 }
1297
1298 /* Determine if we have any "remainder" (after the digits, might include
1299 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001300 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001302 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 &n_im_remainder, &im_has_decimal);
1304
1305 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001306 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001307 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001308 &locale) == -1)
1309 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310
1311 /* Turn off any padding. We'll do it later after we've composed
1312 the numbers without padding. */
1313 tmp_format.fill_char = '\0';
1314 tmp_format.align = '<';
1315 tmp_format.width = -1;
1316
1317 /* Calculate how much memory we'll need. */
1318 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1319 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001320 re_has_decimal, &locale, &tmp_format,
1321 &maxchar);
Victor Stinner6f5fa1b2018-11-26 14:17:01 +01001322 if (n_re_total == -1) {
1323 goto done;
1324 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325
1326 /* Same formatting, but always include a sign, unless the real part is
1327 * going to be omitted, in which case we use whatever sign convention was
1328 * requested by the original format. */
1329 if (!skip_re)
1330 tmp_format.sign = '+';
1331 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1332 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001333 im_has_decimal, &locale, &tmp_format,
1334 &maxchar);
Victor Stinner6f5fa1b2018-11-26 14:17:01 +01001335 if (n_im_total == -1) {
1336 goto done;
1337 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338
1339 if (skip_re)
1340 n_re_total = 0;
1341
1342 /* Add 1 for the 'j', and optionally 2 for parens. */
1343 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1344 format->width, format->align, &lpad, &rpad, &total);
1345
Victor Stinner41a863c2012-02-24 00:37:51 +01001346 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001347 maxchar = Py_MAX(maxchar, format->fill_char);
1348
Victor Stinnerd3f08822012-05-29 12:57:52 +02001349 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001351 rkind = writer->kind;
1352 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353
1354 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001355 result = fill_padding(writer,
1356 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001357 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001358 if (result == -1)
1359 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001360
Victor Stinnerd3f08822012-05-29 12:57:52 +02001361 if (add_parens) {
1362 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1363 writer->pos++;
1364 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365
1366 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001367 result = fill_number(writer, &re_spec,
1368 re_unicode_tmp, i_re, i_re + n_re_digits,
1369 NULL, 0,
1370 0,
1371 &locale, 0);
1372 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001373 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001374 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001375 result = fill_number(writer, &im_spec,
1376 im_unicode_tmp, i_im, i_im + n_im_digits,
1377 NULL, 0,
1378 0,
1379 &locale, 0);
1380 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001381 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001382 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1383 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001384
Victor Stinnerd3f08822012-05-29 12:57:52 +02001385 if (add_parens) {
1386 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1387 writer->pos++;
1388 }
1389
1390 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001391
1392done:
1393 PyMem_Free(re_buf);
1394 PyMem_Free(im_buf);
1395 Py_XDECREF(re_unicode_tmp);
1396 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001397 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001398 return result;
1399}
1400
1401/************************************************************************/
1402/*********** built in formatters ****************************************/
1403/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001404static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001405format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1406{
1407 PyObject *str;
1408 int err;
1409
1410 str = PyObject_Str(obj);
1411 if (str == NULL)
1412 return -1;
1413 err = _PyUnicodeWriter_WriteStr(writer, str);
1414 Py_DECREF(str);
1415 return err;
1416}
1417
1418int
1419_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1420 PyObject *obj,
1421 PyObject *format_spec,
1422 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001423{
1424 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001425
1426 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001427
1428 /* check for the special case of zero length format spec, make
1429 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001430 if (start == end) {
1431 if (PyUnicode_CheckExact(obj))
1432 return _PyUnicodeWriter_WriteStr(writer, obj);
1433 else
1434 return format_obj(obj, writer);
1435 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001436
1437 /* parse the format_spec */
1438 if (!parse_internal_render_format_spec(format_spec, start, end,
1439 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001440 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001441
1442 /* type conversion? */
1443 switch (format.type) {
1444 case 's':
1445 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001446 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001447 default:
1448 /* unknown */
1449 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001450 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001451 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001452}
1453
Victor Stinnerd3f08822012-05-29 12:57:52 +02001454int
1455_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1456 PyObject *obj,
1457 PyObject *format_spec,
1458 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001459{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001460 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001461 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001462 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001463
1464 /* check for the special case of zero length format spec, make
1465 it equivalent to str(obj) */
1466 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001467 if (PyLong_CheckExact(obj))
1468 return _PyLong_FormatWriter(writer, obj, 10, 0);
1469 else
1470 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001471 }
1472
1473 /* parse the format_spec */
1474 if (!parse_internal_render_format_spec(format_spec, start, end,
1475 &format, 'd', '>'))
1476 goto done;
1477
1478 /* type conversion? */
1479 switch (format.type) {
1480 case 'b':
1481 case 'c':
1482 case 'd':
1483 case 'o':
1484 case 'x':
1485 case 'X':
1486 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001487 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001488 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001489 break;
1490
1491 case 'e':
1492 case 'E':
1493 case 'f':
1494 case 'F':
1495 case 'g':
1496 case 'G':
1497 case '%':
1498 /* convert to float */
1499 tmp = PyNumber_Float(obj);
1500 if (tmp == NULL)
1501 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001502 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001503 break;
1504
1505 default:
1506 /* unknown */
1507 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1508 goto done;
1509 }
1510
1511done:
1512 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001513 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001514 return result;
1515}
1516
Victor Stinnerd3f08822012-05-29 12:57:52 +02001517int
1518_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1519 PyObject *obj,
1520 PyObject *format_spec,
1521 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001522{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001523 InternalFormatSpec format;
1524
1525 /* check for the special case of zero length format spec, make
1526 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001527 if (start == end)
1528 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001529
1530 /* parse the format_spec */
1531 if (!parse_internal_render_format_spec(format_spec, start, end,
1532 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001533 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001534
1535 /* type conversion? */
1536 switch (format.type) {
1537 case '\0': /* No format code: like 'g', but with at least one decimal. */
1538 case 'e':
1539 case 'E':
1540 case 'f':
1541 case 'F':
1542 case 'g':
1543 case 'G':
1544 case 'n':
1545 case '%':
1546 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001547 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001548
1549 default:
1550 /* unknown */
1551 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001552 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001553 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001554}
1555
Victor Stinnerd3f08822012-05-29 12:57:52 +02001556int
1557_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1558 PyObject *obj,
1559 PyObject *format_spec,
1560 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001561{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001562 InternalFormatSpec format;
1563
1564 /* check for the special case of zero length format spec, make
1565 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001566 if (start == end)
1567 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001568
1569 /* parse the format_spec */
1570 if (!parse_internal_render_format_spec(format_spec, start, end,
1571 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001572 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001573
1574 /* type conversion? */
1575 switch (format.type) {
1576 case '\0': /* No format code: like 'g', but with at least one decimal. */
1577 case 'e':
1578 case 'E':
1579 case 'f':
1580 case 'F':
1581 case 'g':
1582 case 'G':
1583 case 'n':
1584 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001585 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001586
1587 default:
1588 /* unknown */
1589 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001590 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001591 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001592}