blob: ed95f267d476c7bab41e74d81a34afb5fc96b395 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Victor Stinner02e6bf72018-11-20 16:20:16 +01006#include "pycore_fileutils.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02007#include <locale.h>
8
9/* Raises an exception about an unknown presentation type for this
10 * type. */
11
12static void
13unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15{
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29}
30
31static void
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070032invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020033{
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070034 assert(specifier == ',' || specifier == '_');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020035 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070037 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020039 else
40 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070041 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020043}
44
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070046invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040047{
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49}
50
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020051/*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57*/
58static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030059get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020060 Py_ssize_t *result)
61{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030064 int kind = PyUnicode_KIND(str);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +030065 const void *data = PyUnicode_DATA(str);
Serhiy Storchaka1f932612016-08-29 15:57:26 +030066
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030068 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 if (digitval < 0)
71 break;
72 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000073 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 */
Mark Dickinson47862d42011-12-01 15:27:04 +000078 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020079 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030081 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020082 return -1;
83 }
Mark Dickinson47862d42011-12-01 15:27:04 +000084 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030086 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020087 *result = accumulator;
88 return numdigits;
89}
90
91/************************************************************************/
92/*********** standard format specifier parsing **************************/
93/************************************************************************/
94
95/* returns true if this character is a specifier alignment token */
96Py_LOCAL_INLINE(int)
97is_alignment_token(Py_UCS4 c)
98{
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105}
106
107/* returns true if this character is a sign element */
108Py_LOCAL_INLINE(int)
109is_sign_element(Py_UCS4 c)
110{
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117}
Eric Smith8c663262007-08-25 02:26:07 +0000118
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400119/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700120enum LocaleType {
121 LT_NO_LOCALE = 0,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
Benjamin Peterson995026a2016-09-13 22:46:15 -0700124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000127
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700134 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200135 Py_ssize_t precision;
136 Py_UCS4 type;
137} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000138
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700140/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200141static void
142DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143{
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154}
155#endif
156
157
158/*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163*/
164static int
165parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170{
171 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300172 int kind = PyUnicode_KIND(format_spec);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300173 const void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200174 /* end-pos is used throughout this code to specify the length of
175 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300176#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200177
178 Py_ssize_t consumed;
179 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181
Eric V. Smith2ea97122014-04-14 11:55:10 -0400182 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700187 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400196 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200221 format->fill_char = '0';
222 if (!align_specified) {
223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400242 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200243 ++pos;
244 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700247 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
Miss Islington (bot)c16a2a12020-09-01 08:45:59 -0700255 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
256 invalid_comma_and_underscore();
257 return 0;
258 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400259 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200260
261 /* Parse field precision */
262 if (end-pos && READ_spec(pos) == '.') {
263 ++pos;
264
265 consumed = get_integer(format_spec, &pos, end, &format->precision);
266 if (consumed == -1)
267 /* Overflow error. Exception already set. */
268 return 0;
269
270 /* Not having a precision after a dot is an error. */
271 if (consumed == 0) {
272 PyErr_Format(PyExc_ValueError,
273 "Format specifier missing precision");
274 return 0;
275 }
276
277 }
278
279 /* Finally, parse the type field. */
280
281 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500282 /* More than one char remain, invalid format specifier. */
283 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200284 return 0;
285 }
286
287 if (end-pos == 1) {
288 format->type = READ_spec(pos);
289 ++pos;
290 }
291
292 /* Do as much validating as we can, just by looking at the format
293 specifier. Do not take into account what type of formatting
294 we're doing (int, float, string). */
295
296 if (format->thousands_separators) {
297 switch (format->type) {
298 case 'd':
299 case 'e':
300 case 'f':
301 case 'g':
302 case 'E':
303 case 'G':
304 case '%':
305 case 'F':
306 case '\0':
307 /* These are allowed. See PEP 378.*/
308 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400309 case 'b':
310 case 'o':
311 case 'x':
312 case 'X':
313 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
314 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
315 /* Every four digits, not every three, in bin/oct/hex. */
316 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
317 break;
318 }
Stefan Krahf432a322017-08-21 13:09:59 +0200319 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 default:
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700321 invalid_thousands_separator_type(format->thousands_separators, format->type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 return 0;
323 }
324 }
325
Victor Stinnera4ac6002012-01-21 15:50:49 +0100326 assert (format->align <= 127);
327 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 return 1;
329}
330
331/* Calculate the padding needed. */
332static void
333calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
334 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
335 Py_ssize_t *n_total)
336{
337 if (width >= 0) {
338 if (nchars > width)
339 *n_total = nchars;
340 else
341 *n_total = width;
342 }
343 else {
344 /* not specified, use all of the chars and no more */
345 *n_total = nchars;
346 }
347
348 /* Figure out how much leading space we need, based on the
349 aligning */
350 if (align == '>')
351 *n_lpadding = *n_total - nchars;
352 else if (align == '^')
353 *n_lpadding = (*n_total - nchars) / 2;
354 else if (align == '<' || align == '=')
355 *n_lpadding = 0;
356 else {
357 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700358 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 }
360
361 *n_rpadding = *n_total - nchars - *n_lpadding;
362}
363
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364/* Do the padding, and return a pointer to where the caller-supplied
365 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200366static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200367fill_padding(_PyUnicodeWriter *writer,
368 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200369 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
370 Py_ssize_t n_rpadding)
371{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200372 Py_ssize_t pos;
373
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200375 if (n_lpadding) {
376 pos = writer->pos;
377 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
378 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379
380 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200381 if (n_rpadding) {
382 pos = writer->pos + nchars + n_lpadding;
383 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
384 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200385
386 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200387 writer->pos += n_lpadding;
388 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200389}
390
391/************************************************************************/
392/*********** common routines for numeric formatting *********************/
393/************************************************************************/
394
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395/* Locale info needed for formatting integers and the part of floats
396 before and including the decimal. Note that locales only support
397 8-bit chars, not unicode. */
398typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100399 PyObject *decimal_point;
400 PyObject *thousands_sep;
401 const char *grouping;
Victor Stinner02e6bf72018-11-20 16:20:16 +0100402 char *grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403} LocaleInfo;
404
Victor Stinner02e6bf72018-11-20 16:20:16 +0100405#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
Victor Stinner41a863c2012-02-24 00:37:51 +0100406
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407/* describes the layout for an integer, see the comment in
408 calc_number_widths() for details */
409typedef struct {
410 Py_ssize_t n_lpadding;
411 Py_ssize_t n_prefix;
412 Py_ssize_t n_spadding;
413 Py_ssize_t n_rpadding;
414 char sign;
415 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
416 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
417 any grouping chars. */
418 Py_ssize_t n_decimal; /* 0 if only an integer */
419 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
420 excluding the decimal itself, if
421 present. */
422
423 /* These 2 are not the widths of fields, but are needed by
424 STRINGLIB_GROUPING. */
425 Py_ssize_t n_digits; /* The number of digits before a decimal
426 or exponent. */
427 Py_ssize_t n_min_width; /* The min_width we used when we computed
428 the n_grouped_digits width. */
429} NumberFieldWidths;
430
431
432/* Given a number of the form:
433 digits[remainder]
434 where ptr points to the start and end points to the end, find where
435 the integer part ends. This could be a decimal, an exponent, both,
436 or neither.
437 If a decimal point is present, set *has_decimal and increment
438 remainder beyond it.
439 Results are undefined (but shouldn't crash) for improperly
440 formatted strings.
441*/
442static void
443parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
444 Py_ssize_t *n_remainder, int *has_decimal)
445{
446 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300447 int kind = PyUnicode_KIND(s);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300448 const void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300450 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 ++pos;
452 remainder = pos;
453
454 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300455 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456
457 /* Skip the decimal point. */
458 if (*has_decimal)
459 remainder++;
460
461 *n_remainder = end - remainder;
462}
463
464/* not all fields of format are used. for example, precision is
465 unused. should this take discrete params in order to be more clear
466 about what it does? or is passing a single format parameter easier
Victor Stinner59423e32018-11-26 13:40:01 +0100467 and more efficient enough to justify a little obfuscation?
468 Return -1 on error. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469static Py_ssize_t
470calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Andy Lesterad0c7752020-03-07 11:29:10 -0600471 Py_UCS4 sign_char, Py_ssize_t n_start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 Py_ssize_t n_end, Py_ssize_t n_remainder,
473 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100474 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475{
476 Py_ssize_t n_non_digit_non_padding;
477 Py_ssize_t n_padding;
478
479 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
480 spec->n_lpadding = 0;
481 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100482 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200483 spec->n_remainder = n_remainder;
484 spec->n_spadding = 0;
485 spec->n_rpadding = 0;
486 spec->sign = '\0';
487 spec->n_sign = 0;
488
489 /* the output will look like:
490 | |
491 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
492 | |
493
494 sign is computed from format->sign and the actual
495 sign of the number
496
497 prefix is given (it's for the '0x' prefix)
498
499 digits is already known
500
501 the total width is either given, or computed from the
502 actual digits
503
504 only one of lpadding, spadding, and rpadding can be non-zero,
505 and it's calculated from the width and other fields
506 */
507
508 /* compute the various parts we're going to write */
509 switch (format->sign) {
510 case '+':
511 /* always put a + or - */
512 spec->n_sign = 1;
513 spec->sign = (sign_char == '-' ? '-' : '+');
514 break;
515 case ' ':
516 spec->n_sign = 1;
517 spec->sign = (sign_char == '-' ? '-' : ' ');
518 break;
519 default:
520 /* Not specified, or the default (-) */
521 if (sign_char == '-') {
522 spec->n_sign = 1;
523 spec->sign = '-';
524 }
525 }
526
527 /* The number of chars used for non-digits and non-padding. */
528 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
529 spec->n_remainder;
530
531 /* min_width can go negative, that's okay. format->width == -1 means
532 we don't care. */
533 if (format->fill_char == '0' && format->align == '=')
534 spec->n_min_width = format->width - n_non_digit_non_padding;
535 else
536 spec->n_min_width = 0;
537
538 if (spec->n_digits == 0)
539 /* This case only occurs when using 'c' formatting, we need
540 to special case it because the grouping code always wants
541 to have at least one character. */
542 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100543 else {
544 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200545 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100546 NULL, 0,
Victor Stinner59423e32018-11-26 13:40:01 +0100547 NULL, 0, spec->n_digits,
548 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100549 locale->grouping, locale->thousands_sep, &grouping_maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +0100550 if (spec->n_grouped_digits == -1) {
551 return -1;
552 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100553 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
554 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555
556 /* Given the desired width and the total of digit and non-digit
557 space we consume, see if we need any padding. format->width can
558 be negative (meaning no padding), but this code still works in
559 that case. */
560 n_padding = format->width -
561 (n_non_digit_non_padding + spec->n_grouped_digits);
562 if (n_padding > 0) {
563 /* Some padding is needed. Determine if it's left, space, or right. */
564 switch (format->align) {
565 case '<':
566 spec->n_rpadding = n_padding;
567 break;
568 case '^':
569 spec->n_lpadding = n_padding / 2;
570 spec->n_rpadding = n_padding - spec->n_lpadding;
571 break;
572 case '=':
573 spec->n_spadding = n_padding;
574 break;
575 case '>':
576 spec->n_lpadding = n_padding;
577 break;
578 default:
Serhiy Storchakaeebaa9b2020-03-09 20:49:52 +0200579 /* Shouldn't get here */
Barry Warsawb2e57942017-09-14 18:13:16 -0700580 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 }
582 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100583
584 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
585 *maxchar = Py_MAX(*maxchar, format->fill_char);
586
Victor Stinner90f50d42012-02-24 01:44:47 +0100587 if (spec->n_decimal)
588 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
589
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200590 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
591 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
592 spec->n_remainder + spec->n_rpadding;
593}
594
Hansraj Das7320ec02019-10-25 22:14:02 +0530595/* Fill in the digit parts of a number's string representation,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200597 Return -1 on error, or 0 on success. */
598static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200599fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Andy Lesterad0c7752020-03-07 11:29:10 -0600600 PyObject *digits, Py_ssize_t d_start,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200601 PyObject *prefix, Py_ssize_t p_start,
602 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603 LocaleInfo *locale, int toupper)
604{
605 /* Used to keep track of digits, decimal, and remainder. */
606 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200607 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200608 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200610
611 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200612 _PyUnicode_FastFill(writer->buffer,
613 writer->pos, spec->n_lpadding, fill_char);
614 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200615 }
616 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200617 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
618 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200619 }
620 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200621 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
622 prefix, p_start,
623 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200624 if (toupper) {
625 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500626 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100628 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100629 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200630 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500631 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200632 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200633 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200634 }
635 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200636 _PyUnicode_FastFill(writer->buffer,
637 writer->pos, spec->n_spadding, fill_char);
638 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 }
640
641 /* Only for type 'c' special case, it has no digits. */
642 if (spec->n_digits != 0) {
643 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinner90f50d42012-02-24 01:44:47 +0100644 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinner59423e32018-11-26 13:40:01 +0100645 writer, spec->n_grouped_digits,
646 digits, d_pos, spec->n_digits,
647 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100648 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100649 if (r == -1)
650 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200651 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200652 d_pos += spec->n_digits;
653 }
654 if (toupper) {
655 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500656 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200657 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100658 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500659 if (c > 127) {
660 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
661 return -1;
662 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200663 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500664 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200665 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200666 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200667
668 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200669 _PyUnicode_FastCopyCharacters(
670 writer->buffer, writer->pos,
671 locale->decimal_point, 0, spec->n_decimal);
672 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200673 d_pos += 1;
674 }
675
676 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200677 _PyUnicode_FastCopyCharacters(
678 writer->buffer, writer->pos,
679 digits, d_pos, spec->n_remainder);
680 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400681 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200682 }
683
684 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200685 _PyUnicode_FastFill(writer->buffer,
686 writer->pos, spec->n_rpadding,
687 fill_char);
688 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200689 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200690 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691}
692
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200693static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694
695/* Find the decimal point character(s?), thousands_separator(s?), and
696 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400697 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
698 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100699static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700700get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200701{
702 switch (type) {
703 case LT_CURRENT_LOCALE: {
Victor Stinner02e6bf72018-11-20 16:20:16 +0100704 struct lconv *lc = localeconv();
705 if (_Py_GetLocaleconvNumeric(lc,
706 &locale_info->decimal_point,
707 &locale_info->thousands_sep) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100708 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100709 }
Victor Stinner02e6bf72018-11-20 16:20:16 +0100710
711 /* localeconv() grouping can become a dangling pointer or point
712 to a different string if another thread calls localeconv() during
713 the string formatting. Copy the string to avoid this risk. */
714 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
715 if (locale_info->grouping_buffer == NULL) {
716 PyErr_NoMemory();
717 return -1;
718 }
719 locale_info->grouping = locale_info->grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720 break;
721 }
722 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400723 case LT_UNDERSCORE_LOCALE:
724 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100725 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400726 locale_info->thousands_sep = PyUnicode_FromOrdinal(
727 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700728 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100729 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400730 if (type != LT_UNDER_FOUR_LOCALE)
731 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200732 (implicit) trailing 0 means repeat
733 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400734 else
735 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200736 break;
737 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100738 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
739 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700740 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100741 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200742 locale_info->grouping = no_grouping;
743 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200744 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100745 return 0;
746}
747
748static void
749free_locale_info(LocaleInfo *locale_info)
750{
751 Py_XDECREF(locale_info->decimal_point);
752 Py_XDECREF(locale_info->thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +0100753 PyMem_Free(locale_info->grouping_buffer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754}
755
756/************************************************************************/
757/*********** string formatting ******************************************/
758/************************************************************************/
759
Victor Stinnerd3f08822012-05-29 12:57:52 +0200760static int
761format_string_internal(PyObject *value, const InternalFormatSpec *format,
762 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763{
764 Py_ssize_t lpad;
765 Py_ssize_t rpad;
766 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200767 Py_ssize_t len;
768 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200769 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200770
Victor Stinnerd3f08822012-05-29 12:57:52 +0200771 assert(PyUnicode_IS_READY(value));
772 len = PyUnicode_GET_LENGTH(value);
773
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200774 /* sign is not allowed on strings */
775 if (format->sign != '\0') {
776 PyErr_SetString(PyExc_ValueError,
777 "Sign not allowed in string format specifier");
778 goto done;
779 }
780
781 /* alternate is not allowed on strings */
782 if (format->alternate) {
783 PyErr_SetString(PyExc_ValueError,
784 "Alternate form (#) not allowed in string format "
785 "specifier");
786 goto done;
787 }
788
789 /* '=' alignment not allowed on strings */
790 if (format->align == '=') {
791 PyErr_SetString(PyExc_ValueError,
792 "'=' alignment not allowed "
793 "in string format specifier");
794 goto done;
795 }
796
Victor Stinner621ef3d2012-10-02 00:33:47 +0200797 if ((format->width == -1 || format->width <= len)
798 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200799 /* Fast path */
800 return _PyUnicodeWriter_WriteStr(writer, value);
801 }
802
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200803 /* if precision is specified, output no more that format.precision
804 characters */
805 if (format->precision >= 0 && len >= format->precision) {
806 len = format->precision;
807 }
808
809 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
810
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200811 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100812 if (lpad != 0 || rpad != 0)
813 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200814 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
815 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
816 maxchar = Py_MAX(maxchar, valmaxchar);
817 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100818
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200819 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200820 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200821 goto done;
822
823 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400824 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200825 if (result == -1)
826 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200827
828 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200829 if (len) {
830 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
831 value, 0, len);
832 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200833 writer->pos += (len + rpad);
834 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200835
836done:
837 return result;
838}
839
840
841/************************************************************************/
842/*********** long formatting ********************************************/
843/************************************************************************/
844
Victor Stinnerd3f08822012-05-29 12:57:52 +0200845static int
846format_long_internal(PyObject *value, const InternalFormatSpec *format,
847 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200848{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200849 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100850 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200851 PyObject *tmp = NULL;
852 Py_ssize_t inumeric_chars;
853 Py_UCS4 sign_char = '\0';
854 Py_ssize_t n_digits; /* count of digits need from the computed
855 string */
856 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
857 produces non-digits */
858 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
859 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100860 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200861 NumberFieldWidths spec;
862 long x;
863
864 /* Locale settings, either from the actual locale or
865 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +0100866 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867
868 /* no precision allowed on integers */
869 if (format->precision != -1) {
870 PyErr_SetString(PyExc_ValueError,
871 "Precision not allowed in integer format specifier");
872 goto done;
873 }
874
875 /* special case for character formatting */
876 if (format->type == 'c') {
877 /* error to specify a sign */
878 if (format->sign != '\0') {
879 PyErr_SetString(PyExc_ValueError,
880 "Sign not allowed with integer"
881 " format specifier 'c'");
882 goto done;
883 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400884 /* error to request alternate format */
885 if (format->alternate) {
886 PyErr_SetString(PyExc_ValueError,
887 "Alternate form (#) not allowed with integer"
888 " format specifier 'c'");
889 goto done;
890 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200891
892 /* taken from unicodeobject.c formatchar() */
893 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200894 x = PyLong_AsLong(value);
895 if (x == -1 && PyErr_Occurred())
896 goto done;
897 if (x < 0 || x > 0x10ffff) {
898 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100899 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200900 goto done;
901 }
902 tmp = PyUnicode_FromOrdinal(x);
903 inumeric_chars = 0;
904 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100905 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200906
907 /* As a sort-of hack, we tell calc_number_widths that we only
908 have "remainder" characters. calc_number_widths thinks
909 these are characters that don't get formatted, only copied
910 into the output string. We do this for 'c' formatting,
911 because the characters are likely to be non-digits. */
912 n_remainder = 1;
913 }
914 else {
915 int base;
916 int leading_chars_to_skip = 0; /* Number of characters added by
917 PyNumber_ToBase that we want to
918 skip over. */
919
920 /* Compute the base and how many characters will be added by
921 PyNumber_ToBase */
922 switch (format->type) {
923 case 'b':
924 base = 2;
925 leading_chars_to_skip = 2; /* 0b */
926 break;
927 case 'o':
928 base = 8;
929 leading_chars_to_skip = 2; /* 0o */
930 break;
931 case 'x':
932 case 'X':
933 base = 16;
934 leading_chars_to_skip = 2; /* 0x */
935 break;
936 default: /* shouldn't be needed, but stops a compiler warning */
937 case 'd':
938 case 'n':
939 base = 10;
940 break;
941 }
942
Victor Stinnerd3f08822012-05-29 12:57:52 +0200943 if (format->sign != '+' && format->sign != ' '
944 && format->width == -1
945 && format->type != 'X' && format->type != 'n'
946 && !format->thousands_separators
947 && PyLong_CheckExact(value))
948 {
949 /* Fast path */
950 return _PyLong_FormatWriter(writer, value, base, format->alternate);
951 }
952
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200953 /* The number of prefix chars is the same as the leading
954 chars to skip */
955 if (format->alternate)
956 n_prefix = leading_chars_to_skip;
957
958 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200959 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
961 goto done;
962
963 inumeric_chars = 0;
964 n_digits = PyUnicode_GET_LENGTH(tmp);
965
966 prefix = inumeric_chars;
967
968 /* Is a sign character present in the output? If so, remember it
969 and skip it */
970 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
971 sign_char = '-';
972 ++prefix;
973 ++leading_chars_to_skip;
974 }
975
976 /* Skip over the leading chars (0x, 0b, etc.) */
977 n_digits -= leading_chars_to_skip;
978 inumeric_chars += leading_chars_to_skip;
979 }
980
981 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100982 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400983 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100984 &locale) == -1)
985 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200986
987 /* Calculate how much memory we'll need. */
Andy Lesterad0c7752020-03-07 11:29:10 -0600988 n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100989 inumeric_chars + n_digits, n_remainder, 0,
990 &locale, format, &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +0100991 if (n_total == -1) {
992 goto done;
993 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100994
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200996 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200997 goto done;
998
999 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001000 result = fill_number(writer, &spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001001 tmp, inumeric_chars,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001002 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001003 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001004
1005done:
1006 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001007 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 return result;
1009}
1010
1011/************************************************************************/
1012/*********** float formatting *******************************************/
1013/************************************************************************/
1014
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001015/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001016static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001017format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001018 const InternalFormatSpec *format,
1019 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001020{
1021 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1022 Py_ssize_t n_digits;
1023 Py_ssize_t n_remainder;
1024 Py_ssize_t n_total;
1025 int has_decimal;
1026 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001027 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001028 Py_UCS4 type = format->type;
1029 int add_pct = 0;
1030 Py_ssize_t index;
1031 NumberFieldWidths spec;
1032 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001033 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001034 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001035 Py_UCS4 sign_char = '\0';
1036 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1037 PyObject *unicode_tmp = NULL;
1038
1039 /* Locale settings, either from the actual locale or
1040 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001041 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001042
Victor Stinner2f084ec2013-06-23 14:54:30 +02001043 if (format->precision > INT_MAX) {
1044 PyErr_SetString(PyExc_ValueError, "precision too big");
1045 goto done;
1046 }
1047 precision = (int)format->precision;
1048
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001049 if (format->alternate)
1050 flags |= Py_DTSF_ALT;
1051
1052 if (type == '\0') {
1053 /* Omitted type specifier. Behaves in the same way as repr(x)
1054 and str(x) if no precision is given, else like 'g', but with
1055 at least one digit after the decimal point. */
1056 flags |= Py_DTSF_ADD_DOT_0;
1057 type = 'r';
1058 default_precision = 0;
1059 }
1060
1061 if (type == 'n')
1062 /* 'n' is the same as 'g', except for the locale used to
1063 format the result. We take care of that later. */
1064 type = 'g';
1065
1066 val = PyFloat_AsDouble(value);
1067 if (val == -1.0 && PyErr_Occurred())
1068 goto done;
1069
1070 if (type == '%') {
1071 type = 'f';
1072 val *= 100;
1073 add_pct = 1;
1074 }
1075
1076 if (precision < 0)
1077 precision = default_precision;
1078 else if (type == 'r')
1079 type = 'g';
1080
Martin Panter4c359642016-05-08 13:53:41 +00001081 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082 8-bit char. This is safe, because we've restricted what "type"
1083 can be. */
1084 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1085 &float_type);
1086 if (buf == NULL)
1087 goto done;
1088 n_digits = strlen(buf);
1089
1090 if (add_pct) {
1091 /* We know that buf has a trailing zero (since we just called
1092 strlen() on it), and we don't use that fact any more. So we
1093 can just write over the trailing zero. */
1094 buf[n_digits] = '%';
1095 n_digits += 1;
1096 }
1097
Victor Stinnerd3f08822012-05-29 12:57:52 +02001098 if (format->sign != '+' && format->sign != ' '
1099 && format->width == -1
1100 && format->type != 'n'
1101 && !format->thousands_separators)
1102 {
1103 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001104 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1105 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001106 return result;
1107 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001108
Victor Stinner4a587072013-11-19 12:54:53 +01001109 /* Since there is no unicode version of PyOS_double_to_string,
1110 just use the 8 bit version and then convert to unicode. */
1111 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1112 PyMem_Free(buf);
1113 if (unicode_tmp == NULL)
1114 goto done;
1115
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001116 /* Is a sign character present in the output? If so, remember it
1117 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001118 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001119 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1120 sign_char = '-';
1121 ++index;
1122 --n_digits;
1123 }
1124
1125 /* Determine if we have any "remainder" (after the digits, might include
1126 decimal or exponent or both (or neither)) */
1127 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1128
1129 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001130 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001131 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001132 &locale) == -1)
1133 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134
1135 /* Calculate how much memory we'll need. */
Andy Lesterad0c7752020-03-07 11:29:10 -06001136 n_total = calc_number_widths(&spec, 0, sign_char, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001137 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001138 &locale, format, &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001139 if (n_total == -1) {
1140 goto done;
1141 }
Victor Stinnera4ac6002012-01-21 15:50:49 +01001142
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001144 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 goto done;
1146
1147 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001148 result = fill_number(writer, &spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001149 unicode_tmp, index,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001150 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001151 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152
1153done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001154 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001155 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 return result;
1157}
1158
1159/************************************************************************/
1160/*********** complex formatting *****************************************/
1161/************************************************************************/
1162
Victor Stinnerd3f08822012-05-29 12:57:52 +02001163static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001164format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001165 const InternalFormatSpec *format,
1166 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001167{
1168 double re;
1169 double im;
1170 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1171 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1172
1173 InternalFormatSpec tmp_format = *format;
1174 Py_ssize_t n_re_digits;
1175 Py_ssize_t n_im_digits;
1176 Py_ssize_t n_re_remainder;
1177 Py_ssize_t n_im_remainder;
1178 Py_ssize_t n_re_total;
1179 Py_ssize_t n_im_total;
1180 int re_has_decimal;
1181 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001182 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 Py_UCS4 type = format->type;
1184 Py_ssize_t i_re;
1185 Py_ssize_t i_im;
1186 NumberFieldWidths re_spec;
1187 NumberFieldWidths im_spec;
1188 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001189 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001190 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001191 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001192 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001193 Py_UCS4 re_sign_char = '\0';
1194 Py_UCS4 im_sign_char = '\0';
1195 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1196 int im_float_type;
1197 int add_parens = 0;
1198 int skip_re = 0;
1199 Py_ssize_t lpad;
1200 Py_ssize_t rpad;
1201 Py_ssize_t total;
1202 PyObject *re_unicode_tmp = NULL;
1203 PyObject *im_unicode_tmp = NULL;
1204
1205 /* Locale settings, either from the actual locale or
1206 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001207 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001208
Victor Stinner2f084ec2013-06-23 14:54:30 +02001209 if (format->precision > INT_MAX) {
1210 PyErr_SetString(PyExc_ValueError, "precision too big");
1211 goto done;
1212 }
1213 precision = (int)format->precision;
1214
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001215 /* Zero padding is not allowed. */
1216 if (format->fill_char == '0') {
1217 PyErr_SetString(PyExc_ValueError,
1218 "Zero padding is not allowed in complex format "
1219 "specifier");
1220 goto done;
1221 }
1222
1223 /* Neither is '=' alignment . */
1224 if (format->align == '=') {
1225 PyErr_SetString(PyExc_ValueError,
1226 "'=' alignment flag is not allowed in complex format "
1227 "specifier");
1228 goto done;
1229 }
1230
1231 re = PyComplex_RealAsDouble(value);
1232 if (re == -1.0 && PyErr_Occurred())
1233 goto done;
1234 im = PyComplex_ImagAsDouble(value);
1235 if (im == -1.0 && PyErr_Occurred())
1236 goto done;
1237
1238 if (format->alternate)
1239 flags |= Py_DTSF_ALT;
1240
1241 if (type == '\0') {
1242 /* Omitted type specifier. Should be like str(self). */
1243 type = 'r';
1244 default_precision = 0;
1245 if (re == 0.0 && copysign(1.0, re) == 1.0)
1246 skip_re = 1;
1247 else
1248 add_parens = 1;
1249 }
1250
1251 if (type == 'n')
1252 /* 'n' is the same as 'g', except for the locale used to
1253 format the result. We take care of that later. */
1254 type = 'g';
1255
1256 if (precision < 0)
1257 precision = default_precision;
1258 else if (type == 'r')
1259 type = 'g';
1260
Martin Panter4c359642016-05-08 13:53:41 +00001261 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 8-bit char. This is safe, because we've restricted what "type"
1263 can be. */
1264 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1265 &re_float_type);
1266 if (re_buf == NULL)
1267 goto done;
1268 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1269 &im_float_type);
1270 if (im_buf == NULL)
1271 goto done;
1272
1273 n_re_digits = strlen(re_buf);
1274 n_im_digits = strlen(im_buf);
1275
1276 /* Since there is no unicode version of PyOS_double_to_string,
1277 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001278 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 if (re_unicode_tmp == NULL)
1280 goto done;
1281 i_re = 0;
1282
Victor Stinnerd3f08822012-05-29 12:57:52 +02001283 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001284 if (im_unicode_tmp == NULL)
1285 goto done;
1286 i_im = 0;
1287
1288 /* Is a sign character present in the output? If so, remember it
1289 and skip it */
1290 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1291 re_sign_char = '-';
1292 ++i_re;
1293 --n_re_digits;
1294 }
1295 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1296 im_sign_char = '-';
1297 ++i_im;
1298 --n_im_digits;
1299 }
1300
1301 /* Determine if we have any "remainder" (after the digits, might include
1302 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001303 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001305 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 &n_im_remainder, &im_has_decimal);
1307
1308 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001309 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001310 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001311 &locale) == -1)
1312 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313
1314 /* Turn off any padding. We'll do it later after we've composed
1315 the numbers without padding. */
1316 tmp_format.fill_char = '\0';
1317 tmp_format.align = '<';
1318 tmp_format.width = -1;
1319
1320 /* Calculate how much memory we'll need. */
Andy Lesterad0c7752020-03-07 11:29:10 -06001321 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001323 re_has_decimal, &locale, &tmp_format,
1324 &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001325 if (n_re_total == -1) {
1326 goto done;
1327 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328
1329 /* Same formatting, but always include a sign, unless the real part is
1330 * going to be omitted, in which case we use whatever sign convention was
1331 * requested by the original format. */
1332 if (!skip_re)
1333 tmp_format.sign = '+';
Andy Lesterad0c7752020-03-07 11:29:10 -06001334 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001336 im_has_decimal, &locale, &tmp_format,
1337 &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001338 if (n_im_total == -1) {
1339 goto done;
1340 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341
1342 if (skip_re)
1343 n_re_total = 0;
1344
1345 /* Add 1 for the 'j', and optionally 2 for parens. */
1346 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1347 format->width, format->align, &lpad, &rpad, &total);
1348
Victor Stinner41a863c2012-02-24 00:37:51 +01001349 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001350 maxchar = Py_MAX(maxchar, format->fill_char);
1351
Victor Stinnerd3f08822012-05-29 12:57:52 +02001352 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001354 rkind = writer->kind;
1355 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356
1357 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001358 result = fill_padding(writer,
1359 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001360 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001361 if (result == -1)
1362 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001363
Victor Stinnerd3f08822012-05-29 12:57:52 +02001364 if (add_parens) {
1365 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1366 writer->pos++;
1367 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001368
1369 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001370 result = fill_number(writer, &re_spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001371 re_unicode_tmp, i_re,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001372 NULL, 0,
1373 0,
1374 &locale, 0);
1375 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001376 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001377 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001378 result = fill_number(writer, &im_spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001379 im_unicode_tmp, i_im,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001380 NULL, 0,
1381 0,
1382 &locale, 0);
1383 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001384 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001385 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1386 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001387
Victor Stinnerd3f08822012-05-29 12:57:52 +02001388 if (add_parens) {
1389 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1390 writer->pos++;
1391 }
1392
1393 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394
1395done:
1396 PyMem_Free(re_buf);
1397 PyMem_Free(im_buf);
1398 Py_XDECREF(re_unicode_tmp);
1399 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001400 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001401 return result;
1402}
1403
1404/************************************************************************/
1405/*********** built in formatters ****************************************/
1406/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001407static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001408format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1409{
1410 PyObject *str;
1411 int err;
1412
1413 str = PyObject_Str(obj);
1414 if (str == NULL)
1415 return -1;
1416 err = _PyUnicodeWriter_WriteStr(writer, str);
1417 Py_DECREF(str);
1418 return err;
1419}
1420
1421int
1422_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1423 PyObject *obj,
1424 PyObject *format_spec,
1425 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001426{
1427 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001428
1429 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001430
1431 /* check for the special case of zero length format spec, make
1432 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001433 if (start == end) {
1434 if (PyUnicode_CheckExact(obj))
1435 return _PyUnicodeWriter_WriteStr(writer, obj);
1436 else
1437 return format_obj(obj, writer);
1438 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001439
1440 /* parse the format_spec */
1441 if (!parse_internal_render_format_spec(format_spec, start, end,
1442 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001443 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001444
1445 /* type conversion? */
1446 switch (format.type) {
1447 case 's':
1448 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001449 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001450 default:
1451 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001452 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001453 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001455}
1456
Victor Stinnerd3f08822012-05-29 12:57:52 +02001457int
1458_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1459 PyObject *obj,
1460 PyObject *format_spec,
1461 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001462{
Andy Lester3fe91172020-03-01 15:26:43 -06001463 PyObject *tmp = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001464 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001465 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001466
1467 /* check for the special case of zero length format spec, make
1468 it equivalent to str(obj) */
1469 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001470 if (PyLong_CheckExact(obj))
1471 return _PyLong_FormatWriter(writer, obj, 10, 0);
1472 else
1473 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001474 }
1475
1476 /* parse the format_spec */
1477 if (!parse_internal_render_format_spec(format_spec, start, end,
1478 &format, 'd', '>'))
1479 goto done;
1480
1481 /* type conversion? */
1482 switch (format.type) {
1483 case 'b':
1484 case 'c':
1485 case 'd':
1486 case 'o':
1487 case 'x':
1488 case 'X':
1489 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001490 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001491 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001492 break;
1493
1494 case 'e':
1495 case 'E':
1496 case 'f':
1497 case 'F':
1498 case 'g':
1499 case 'G':
1500 case '%':
1501 /* convert to float */
1502 tmp = PyNumber_Float(obj);
1503 if (tmp == NULL)
1504 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001505 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001506 break;
1507
1508 default:
1509 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001510 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511 goto done;
1512 }
1513
1514done:
1515 Py_XDECREF(tmp);
1516 return result;
1517}
1518
Victor Stinnerd3f08822012-05-29 12:57:52 +02001519int
1520_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1521 PyObject *obj,
1522 PyObject *format_spec,
1523 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001524{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001525 InternalFormatSpec format;
1526
1527 /* check for the special case of zero length format spec, make
1528 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001529 if (start == end)
1530 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001531
1532 /* parse the format_spec */
1533 if (!parse_internal_render_format_spec(format_spec, start, end,
1534 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001535 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001536
1537 /* type conversion? */
1538 switch (format.type) {
1539 case '\0': /* No format code: like 'g', but with at least one decimal. */
1540 case 'e':
1541 case 'E':
1542 case 'f':
1543 case 'F':
1544 case 'g':
1545 case 'G':
1546 case 'n':
1547 case '%':
1548 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001549 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001550
1551 default:
1552 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001553 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001554 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001555 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001556}
1557
Victor Stinnerd3f08822012-05-29 12:57:52 +02001558int
1559_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1560 PyObject *obj,
1561 PyObject *format_spec,
1562 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001563{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001564 InternalFormatSpec format;
1565
1566 /* check for the special case of zero length format spec, make
1567 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001568 if (start == end)
1569 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001570
1571 /* parse the format_spec */
1572 if (!parse_internal_render_format_spec(format_spec, start, end,
1573 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001574 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001575
1576 /* type conversion? */
1577 switch (format.type) {
1578 case '\0': /* No format code: like 'g', but with at least one decimal. */
1579 case 'e':
1580 case 'E':
1581 case 'f':
1582 case 'F':
1583 case 'g':
1584 case 'G':
1585 case 'n':
1586 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001587 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001588
1589 default:
1590 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001591 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001592 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001593 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001594}