blob: 7b5a7bd04eb3aafe70bfe0d21e184422b38a87d6 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Victor Stinner02e6bf72018-11-20 16:20:16 +01006#include "pycore_fileutils.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02007#include <locale.h>
8
9/* Raises an exception about an unknown presentation type for this
10 * type. */
11
12static void
13unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15{
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29}
30
31static void
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070032invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020033{
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070034 assert(specifier == ',' || specifier == '_');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020035 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070037 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020039 else
40 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070041 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020043}
44
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070046invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040047{
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49}
50
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020051/*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57*/
58static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030059get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020060 Py_ssize_t *result)
61{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030064 int kind = PyUnicode_KIND(str);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +030065 const void *data = PyUnicode_DATA(str);
Serhiy Storchaka1f932612016-08-29 15:57:26 +030066
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030068 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 if (digitval < 0)
71 break;
72 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000073 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 */
Mark Dickinson47862d42011-12-01 15:27:04 +000078 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020079 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030081 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020082 return -1;
83 }
Mark Dickinson47862d42011-12-01 15:27:04 +000084 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030086 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020087 *result = accumulator;
88 return numdigits;
89}
90
91/************************************************************************/
92/*********** standard format specifier parsing **************************/
93/************************************************************************/
94
95/* returns true if this character is a specifier alignment token */
96Py_LOCAL_INLINE(int)
97is_alignment_token(Py_UCS4 c)
98{
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105}
106
107/* returns true if this character is a sign element */
108Py_LOCAL_INLINE(int)
109is_sign_element(Py_UCS4 c)
110{
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117}
Eric Smith8c663262007-08-25 02:26:07 +0000118
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400119/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700120enum LocaleType {
121 LT_NO_LOCALE = 0,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
Benjamin Peterson995026a2016-09-13 22:46:15 -0700124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000127
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700134 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200135 Py_ssize_t precision;
136 Py_UCS4 type;
137} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000138
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700140/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200141static void
142DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143{
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154}
155#endif
156
157
158/*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163*/
164static int
165parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170{
171 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300172 int kind = PyUnicode_KIND(format_spec);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300173 const void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200174 /* end-pos is used throughout this code to specify the length of
175 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300176#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200177
178 Py_ssize_t consumed;
179 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181
Eric V. Smith2ea97122014-04-14 11:55:10 -0400182 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700187 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400196 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200221 format->fill_char = '0';
Serhiy Storchakacf19cc32021-01-25 11:56:33 +0200222 if (!align_specified && default_align == '>') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400242 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200243 ++pos;
244 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700247 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
han-solo0d6aa7f2020-09-01 10:34:29 -0400255 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
256 invalid_comma_and_underscore();
257 return 0;
258 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400259 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200260
261 /* Parse field precision */
262 if (end-pos && READ_spec(pos) == '.') {
263 ++pos;
264
265 consumed = get_integer(format_spec, &pos, end, &format->precision);
266 if (consumed == -1)
267 /* Overflow error. Exception already set. */
268 return 0;
269
270 /* Not having a precision after a dot is an error. */
271 if (consumed == 0) {
272 PyErr_Format(PyExc_ValueError,
273 "Format specifier missing precision");
274 return 0;
275 }
276
277 }
278
279 /* Finally, parse the type field. */
280
281 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500282 /* More than one char remain, invalid format specifier. */
283 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200284 return 0;
285 }
286
287 if (end-pos == 1) {
288 format->type = READ_spec(pos);
289 ++pos;
290 }
291
292 /* Do as much validating as we can, just by looking at the format
293 specifier. Do not take into account what type of formatting
294 we're doing (int, float, string). */
295
296 if (format->thousands_separators) {
297 switch (format->type) {
298 case 'd':
299 case 'e':
300 case 'f':
301 case 'g':
302 case 'E':
303 case 'G':
304 case '%':
305 case 'F':
306 case '\0':
307 /* These are allowed. See PEP 378.*/
308 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400309 case 'b':
310 case 'o':
311 case 'x':
312 case 'X':
313 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
314 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
315 /* Every four digits, not every three, in bin/oct/hex. */
316 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
317 break;
318 }
Stefan Krahf432a322017-08-21 13:09:59 +0200319 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 default:
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700321 invalid_thousands_separator_type(format->thousands_separators, format->type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 return 0;
323 }
324 }
325
Victor Stinnera4ac6002012-01-21 15:50:49 +0100326 assert (format->align <= 127);
327 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 return 1;
329}
330
331/* Calculate the padding needed. */
332static void
333calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
334 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
335 Py_ssize_t *n_total)
336{
337 if (width >= 0) {
338 if (nchars > width)
339 *n_total = nchars;
340 else
341 *n_total = width;
342 }
343 else {
344 /* not specified, use all of the chars and no more */
345 *n_total = nchars;
346 }
347
348 /* Figure out how much leading space we need, based on the
349 aligning */
350 if (align == '>')
351 *n_lpadding = *n_total - nchars;
352 else if (align == '^')
353 *n_lpadding = (*n_total - nchars) / 2;
354 else if (align == '<' || align == '=')
355 *n_lpadding = 0;
356 else {
357 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700358 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 }
360
361 *n_rpadding = *n_total - nchars - *n_lpadding;
362}
363
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364/* Do the padding, and return a pointer to where the caller-supplied
365 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200366static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200367fill_padding(_PyUnicodeWriter *writer,
368 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200369 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
370 Py_ssize_t n_rpadding)
371{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200372 Py_ssize_t pos;
373
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200375 if (n_lpadding) {
376 pos = writer->pos;
377 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
378 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379
380 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200381 if (n_rpadding) {
382 pos = writer->pos + nchars + n_lpadding;
383 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
384 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200385
386 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200387 writer->pos += n_lpadding;
388 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200389}
390
391/************************************************************************/
392/*********** common routines for numeric formatting *********************/
393/************************************************************************/
394
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395/* Locale info needed for formatting integers and the part of floats
396 before and including the decimal. Note that locales only support
397 8-bit chars, not unicode. */
398typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100399 PyObject *decimal_point;
400 PyObject *thousands_sep;
401 const char *grouping;
Victor Stinner02e6bf72018-11-20 16:20:16 +0100402 char *grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403} LocaleInfo;
404
Victor Stinner02e6bf72018-11-20 16:20:16 +0100405#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
Victor Stinner41a863c2012-02-24 00:37:51 +0100406
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407/* describes the layout for an integer, see the comment in
408 calc_number_widths() for details */
409typedef struct {
410 Py_ssize_t n_lpadding;
411 Py_ssize_t n_prefix;
412 Py_ssize_t n_spadding;
413 Py_ssize_t n_rpadding;
414 char sign;
415 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
416 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
417 any grouping chars. */
418 Py_ssize_t n_decimal; /* 0 if only an integer */
419 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
420 excluding the decimal itself, if
421 present. */
422
423 /* These 2 are not the widths of fields, but are needed by
424 STRINGLIB_GROUPING. */
425 Py_ssize_t n_digits; /* The number of digits before a decimal
426 or exponent. */
427 Py_ssize_t n_min_width; /* The min_width we used when we computed
428 the n_grouped_digits width. */
429} NumberFieldWidths;
430
431
432/* Given a number of the form:
433 digits[remainder]
434 where ptr points to the start and end points to the end, find where
435 the integer part ends. This could be a decimal, an exponent, both,
436 or neither.
437 If a decimal point is present, set *has_decimal and increment
438 remainder beyond it.
439 Results are undefined (but shouldn't crash) for improperly
440 formatted strings.
441*/
442static void
443parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
444 Py_ssize_t *n_remainder, int *has_decimal)
445{
446 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300447 int kind = PyUnicode_KIND(s);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300448 const void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300450 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 ++pos;
452 remainder = pos;
453
454 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300455 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456
457 /* Skip the decimal point. */
458 if (*has_decimal)
459 remainder++;
460
461 *n_remainder = end - remainder;
462}
463
464/* not all fields of format are used. for example, precision is
465 unused. should this take discrete params in order to be more clear
466 about what it does? or is passing a single format parameter easier
Victor Stinner59423e32018-11-26 13:40:01 +0100467 and more efficient enough to justify a little obfuscation?
468 Return -1 on error. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469static Py_ssize_t
470calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Andy Lesterad0c7752020-03-07 11:29:10 -0600471 Py_UCS4 sign_char, Py_ssize_t n_start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 Py_ssize_t n_end, Py_ssize_t n_remainder,
473 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100474 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475{
476 Py_ssize_t n_non_digit_non_padding;
477 Py_ssize_t n_padding;
478
479 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
480 spec->n_lpadding = 0;
481 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100482 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200483 spec->n_remainder = n_remainder;
484 spec->n_spadding = 0;
485 spec->n_rpadding = 0;
486 spec->sign = '\0';
487 spec->n_sign = 0;
488
489 /* the output will look like:
490 | |
491 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
492 | |
493
494 sign is computed from format->sign and the actual
495 sign of the number
496
497 prefix is given (it's for the '0x' prefix)
498
499 digits is already known
500
501 the total width is either given, or computed from the
502 actual digits
503
504 only one of lpadding, spadding, and rpadding can be non-zero,
505 and it's calculated from the width and other fields
506 */
507
508 /* compute the various parts we're going to write */
509 switch (format->sign) {
510 case '+':
511 /* always put a + or - */
512 spec->n_sign = 1;
513 spec->sign = (sign_char == '-' ? '-' : '+');
514 break;
515 case ' ':
516 spec->n_sign = 1;
517 spec->sign = (sign_char == '-' ? '-' : ' ');
518 break;
519 default:
520 /* Not specified, or the default (-) */
521 if (sign_char == '-') {
522 spec->n_sign = 1;
523 spec->sign = '-';
524 }
525 }
526
527 /* The number of chars used for non-digits and non-padding. */
528 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
529 spec->n_remainder;
530
531 /* min_width can go negative, that's okay. format->width == -1 means
532 we don't care. */
533 if (format->fill_char == '0' && format->align == '=')
534 spec->n_min_width = format->width - n_non_digit_non_padding;
535 else
536 spec->n_min_width = 0;
537
538 if (spec->n_digits == 0)
539 /* This case only occurs when using 'c' formatting, we need
540 to special case it because the grouping code always wants
541 to have at least one character. */
542 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100543 else {
544 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200545 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100546 NULL, 0,
Victor Stinner59423e32018-11-26 13:40:01 +0100547 NULL, 0, spec->n_digits,
548 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100549 locale->grouping, locale->thousands_sep, &grouping_maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +0100550 if (spec->n_grouped_digits == -1) {
551 return -1;
552 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100553 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
554 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555
556 /* Given the desired width and the total of digit and non-digit
557 space we consume, see if we need any padding. format->width can
558 be negative (meaning no padding), but this code still works in
559 that case. */
560 n_padding = format->width -
561 (n_non_digit_non_padding + spec->n_grouped_digits);
562 if (n_padding > 0) {
563 /* Some padding is needed. Determine if it's left, space, or right. */
564 switch (format->align) {
565 case '<':
566 spec->n_rpadding = n_padding;
567 break;
568 case '^':
569 spec->n_lpadding = n_padding / 2;
570 spec->n_rpadding = n_padding - spec->n_lpadding;
571 break;
572 case '=':
573 spec->n_spadding = n_padding;
574 break;
575 case '>':
576 spec->n_lpadding = n_padding;
577 break;
578 default:
Serhiy Storchakaeebaa9b2020-03-09 20:49:52 +0200579 /* Shouldn't get here */
Barry Warsawb2e57942017-09-14 18:13:16 -0700580 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 }
582 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100583
584 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
585 *maxchar = Py_MAX(*maxchar, format->fill_char);
586
Victor Stinner90f50d42012-02-24 01:44:47 +0100587 if (spec->n_decimal)
588 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
589
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200590 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
591 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
592 spec->n_remainder + spec->n_rpadding;
593}
594
Hansraj Das7320ec02019-10-25 22:14:02 +0530595/* Fill in the digit parts of a number's string representation,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200597 Return -1 on error, or 0 on success. */
598static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200599fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Andy Lesterad0c7752020-03-07 11:29:10 -0600600 PyObject *digits, Py_ssize_t d_start,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200601 PyObject *prefix, Py_ssize_t p_start,
602 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603 LocaleInfo *locale, int toupper)
604{
605 /* Used to keep track of digits, decimal, and remainder. */
606 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200607 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200608 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200610
611 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200612 _PyUnicode_FastFill(writer->buffer,
613 writer->pos, spec->n_lpadding, fill_char);
614 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200615 }
616 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200617 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
618 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200619 }
620 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200621 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
622 prefix, p_start,
623 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200624 if (toupper) {
625 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500626 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100628 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100629 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200630 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500631 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200632 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200633 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200634 }
635 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200636 _PyUnicode_FastFill(writer->buffer,
637 writer->pos, spec->n_spadding, fill_char);
638 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 }
640
641 /* Only for type 'c' special case, it has no digits. */
642 if (spec->n_digits != 0) {
643 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinner90f50d42012-02-24 01:44:47 +0100644 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinner59423e32018-11-26 13:40:01 +0100645 writer, spec->n_grouped_digits,
646 digits, d_pos, spec->n_digits,
647 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100648 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100649 if (r == -1)
650 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200651 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200652 d_pos += spec->n_digits;
653 }
654 if (toupper) {
655 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500656 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200657 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100658 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500659 if (c > 127) {
660 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
661 return -1;
662 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200663 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500664 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200665 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200666 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200667
668 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200669 _PyUnicode_FastCopyCharacters(
670 writer->buffer, writer->pos,
671 locale->decimal_point, 0, spec->n_decimal);
672 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200673 d_pos += 1;
674 }
675
676 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200677 _PyUnicode_FastCopyCharacters(
678 writer->buffer, writer->pos,
679 digits, d_pos, spec->n_remainder);
680 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400681 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200682 }
683
684 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200685 _PyUnicode_FastFill(writer->buffer,
686 writer->pos, spec->n_rpadding,
687 fill_char);
688 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200689 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200690 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691}
692
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200693static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694
695/* Find the decimal point character(s?), thousands_separator(s?), and
696 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400697 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
698 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100699static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700700get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200701{
702 switch (type) {
703 case LT_CURRENT_LOCALE: {
Victor Stinner02e6bf72018-11-20 16:20:16 +0100704 struct lconv *lc = localeconv();
705 if (_Py_GetLocaleconvNumeric(lc,
706 &locale_info->decimal_point,
707 &locale_info->thousands_sep) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100708 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100709 }
Victor Stinner02e6bf72018-11-20 16:20:16 +0100710
711 /* localeconv() grouping can become a dangling pointer or point
712 to a different string if another thread calls localeconv() during
713 the string formatting. Copy the string to avoid this risk. */
714 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
715 if (locale_info->grouping_buffer == NULL) {
716 PyErr_NoMemory();
717 return -1;
718 }
719 locale_info->grouping = locale_info->grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720 break;
721 }
722 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400723 case LT_UNDERSCORE_LOCALE:
724 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100725 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400726 locale_info->thousands_sep = PyUnicode_FromOrdinal(
727 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700728 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100729 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400730 if (type != LT_UNDER_FOUR_LOCALE)
731 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200732 (implicit) trailing 0 means repeat
733 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400734 else
735 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200736 break;
737 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100738 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
739 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700740 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100741 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200742 locale_info->grouping = no_grouping;
743 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200744 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100745 return 0;
746}
747
748static void
749free_locale_info(LocaleInfo *locale_info)
750{
751 Py_XDECREF(locale_info->decimal_point);
752 Py_XDECREF(locale_info->thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +0100753 PyMem_Free(locale_info->grouping_buffer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754}
755
756/************************************************************************/
757/*********** string formatting ******************************************/
758/************************************************************************/
759
Victor Stinnerd3f08822012-05-29 12:57:52 +0200760static int
761format_string_internal(PyObject *value, const InternalFormatSpec *format,
762 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763{
764 Py_ssize_t lpad;
765 Py_ssize_t rpad;
766 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200767 Py_ssize_t len;
768 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200769 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200770
Victor Stinnerd3f08822012-05-29 12:57:52 +0200771 assert(PyUnicode_IS_READY(value));
772 len = PyUnicode_GET_LENGTH(value);
773
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200774 /* sign is not allowed on strings */
775 if (format->sign != '\0') {
Miss Islington (bot)2d780232021-05-13 14:24:49 -0700776 if (format->sign == ' ') {
777 PyErr_SetString(PyExc_ValueError,
778 "Space not allowed in string format specifier");
779 }
780 else {
781 PyErr_SetString(PyExc_ValueError,
782 "Sign not allowed in string format specifier");
783 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200784 goto done;
785 }
786
787 /* alternate is not allowed on strings */
788 if (format->alternate) {
789 PyErr_SetString(PyExc_ValueError,
790 "Alternate form (#) not allowed in string format "
791 "specifier");
792 goto done;
793 }
794
795 /* '=' alignment not allowed on strings */
796 if (format->align == '=') {
797 PyErr_SetString(PyExc_ValueError,
798 "'=' alignment not allowed "
799 "in string format specifier");
800 goto done;
801 }
802
Victor Stinner621ef3d2012-10-02 00:33:47 +0200803 if ((format->width == -1 || format->width <= len)
804 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200805 /* Fast path */
806 return _PyUnicodeWriter_WriteStr(writer, value);
807 }
808
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 /* if precision is specified, output no more that format.precision
810 characters */
811 if (format->precision >= 0 && len >= format->precision) {
812 len = format->precision;
813 }
814
815 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
816
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200817 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100818 if (lpad != 0 || rpad != 0)
819 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200820 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
821 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
822 maxchar = Py_MAX(maxchar, valmaxchar);
823 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100824
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200825 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200826 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200827 goto done;
828
829 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400830 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200831 if (result == -1)
832 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200833
834 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200835 if (len) {
836 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
837 value, 0, len);
838 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200839 writer->pos += (len + rpad);
840 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200841
842done:
843 return result;
844}
845
846
847/************************************************************************/
848/*********** long formatting ********************************************/
849/************************************************************************/
850
Victor Stinnerd3f08822012-05-29 12:57:52 +0200851static int
852format_long_internal(PyObject *value, const InternalFormatSpec *format,
853 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200854{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200855 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100856 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 PyObject *tmp = NULL;
858 Py_ssize_t inumeric_chars;
859 Py_UCS4 sign_char = '\0';
860 Py_ssize_t n_digits; /* count of digits need from the computed
861 string */
862 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
863 produces non-digits */
864 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
865 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100866 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867 NumberFieldWidths spec;
868 long x;
869
870 /* Locale settings, either from the actual locale or
871 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +0100872 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200873
874 /* no precision allowed on integers */
875 if (format->precision != -1) {
876 PyErr_SetString(PyExc_ValueError,
877 "Precision not allowed in integer format specifier");
878 goto done;
879 }
880
881 /* special case for character formatting */
882 if (format->type == 'c') {
883 /* error to specify a sign */
884 if (format->sign != '\0') {
885 PyErr_SetString(PyExc_ValueError,
886 "Sign not allowed with integer"
887 " format specifier 'c'");
888 goto done;
889 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400890 /* error to request alternate format */
891 if (format->alternate) {
892 PyErr_SetString(PyExc_ValueError,
893 "Alternate form (#) not allowed with integer"
894 " format specifier 'c'");
895 goto done;
896 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897
898 /* taken from unicodeobject.c formatchar() */
899 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200900 x = PyLong_AsLong(value);
901 if (x == -1 && PyErr_Occurred())
902 goto done;
903 if (x < 0 || x > 0x10ffff) {
904 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100905 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200906 goto done;
907 }
908 tmp = PyUnicode_FromOrdinal(x);
909 inumeric_chars = 0;
910 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100911 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200912
913 /* As a sort-of hack, we tell calc_number_widths that we only
914 have "remainder" characters. calc_number_widths thinks
915 these are characters that don't get formatted, only copied
916 into the output string. We do this for 'c' formatting,
917 because the characters are likely to be non-digits. */
918 n_remainder = 1;
919 }
920 else {
921 int base;
922 int leading_chars_to_skip = 0; /* Number of characters added by
923 PyNumber_ToBase that we want to
924 skip over. */
925
926 /* Compute the base and how many characters will be added by
927 PyNumber_ToBase */
928 switch (format->type) {
929 case 'b':
930 base = 2;
931 leading_chars_to_skip = 2; /* 0b */
932 break;
933 case 'o':
934 base = 8;
935 leading_chars_to_skip = 2; /* 0o */
936 break;
937 case 'x':
938 case 'X':
939 base = 16;
940 leading_chars_to_skip = 2; /* 0x */
941 break;
942 default: /* shouldn't be needed, but stops a compiler warning */
943 case 'd':
944 case 'n':
945 base = 10;
946 break;
947 }
948
Victor Stinnerd3f08822012-05-29 12:57:52 +0200949 if (format->sign != '+' && format->sign != ' '
950 && format->width == -1
951 && format->type != 'X' && format->type != 'n'
952 && !format->thousands_separators
953 && PyLong_CheckExact(value))
954 {
955 /* Fast path */
956 return _PyLong_FormatWriter(writer, value, base, format->alternate);
957 }
958
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 /* The number of prefix chars is the same as the leading
960 chars to skip */
961 if (format->alternate)
962 n_prefix = leading_chars_to_skip;
963
964 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200965 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
967 goto done;
968
969 inumeric_chars = 0;
970 n_digits = PyUnicode_GET_LENGTH(tmp);
971
972 prefix = inumeric_chars;
973
974 /* Is a sign character present in the output? If so, remember it
975 and skip it */
976 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
977 sign_char = '-';
978 ++prefix;
979 ++leading_chars_to_skip;
980 }
981
982 /* Skip over the leading chars (0x, 0b, etc.) */
983 n_digits -= leading_chars_to_skip;
984 inumeric_chars += leading_chars_to_skip;
985 }
986
987 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100988 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400989 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100990 &locale) == -1)
991 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992
993 /* Calculate how much memory we'll need. */
Andy Lesterad0c7752020-03-07 11:29:10 -0600994 n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100995 inumeric_chars + n_digits, n_remainder, 0,
996 &locale, format, &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +0100997 if (n_total == -1) {
998 goto done;
999 }
Victor Stinnera4ac6002012-01-21 15:50:49 +01001000
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001001 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001002 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 goto done;
1004
1005 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001006 result = fill_number(writer, &spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001007 tmp, inumeric_chars,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001008 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001009 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010
1011done:
1012 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001013 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 return result;
1015}
1016
1017/************************************************************************/
1018/*********** float formatting *******************************************/
1019/************************************************************************/
1020
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001021/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001022static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001024 const InternalFormatSpec *format,
1025 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026{
1027 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1028 Py_ssize_t n_digits;
1029 Py_ssize_t n_remainder;
1030 Py_ssize_t n_total;
1031 int has_decimal;
1032 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001033 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001034 Py_UCS4 type = format->type;
1035 int add_pct = 0;
1036 Py_ssize_t index;
1037 NumberFieldWidths spec;
1038 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001039 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001040 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001041 Py_UCS4 sign_char = '\0';
1042 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1043 PyObject *unicode_tmp = NULL;
1044
1045 /* Locale settings, either from the actual locale or
1046 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001047 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001048
Victor Stinner2f084ec2013-06-23 14:54:30 +02001049 if (format->precision > INT_MAX) {
1050 PyErr_SetString(PyExc_ValueError, "precision too big");
1051 goto done;
1052 }
1053 precision = (int)format->precision;
1054
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001055 if (format->alternate)
1056 flags |= Py_DTSF_ALT;
1057
1058 if (type == '\0') {
1059 /* Omitted type specifier. Behaves in the same way as repr(x)
1060 and str(x) if no precision is given, else like 'g', but with
1061 at least one digit after the decimal point. */
1062 flags |= Py_DTSF_ADD_DOT_0;
1063 type = 'r';
1064 default_precision = 0;
1065 }
1066
1067 if (type == 'n')
1068 /* 'n' is the same as 'g', except for the locale used to
1069 format the result. We take care of that later. */
1070 type = 'g';
1071
1072 val = PyFloat_AsDouble(value);
1073 if (val == -1.0 && PyErr_Occurred())
1074 goto done;
1075
1076 if (type == '%') {
1077 type = 'f';
1078 val *= 100;
1079 add_pct = 1;
1080 }
1081
1082 if (precision < 0)
1083 precision = default_precision;
1084 else if (type == 'r')
1085 type = 'g';
1086
Martin Panter4c359642016-05-08 13:53:41 +00001087 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001088 8-bit char. This is safe, because we've restricted what "type"
1089 can be. */
1090 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1091 &float_type);
1092 if (buf == NULL)
1093 goto done;
1094 n_digits = strlen(buf);
1095
1096 if (add_pct) {
1097 /* We know that buf has a trailing zero (since we just called
1098 strlen() on it), and we don't use that fact any more. So we
1099 can just write over the trailing zero. */
1100 buf[n_digits] = '%';
1101 n_digits += 1;
1102 }
1103
Victor Stinnerd3f08822012-05-29 12:57:52 +02001104 if (format->sign != '+' && format->sign != ' '
1105 && format->width == -1
1106 && format->type != 'n'
1107 && !format->thousands_separators)
1108 {
1109 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001110 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1111 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001112 return result;
1113 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114
Victor Stinner4a587072013-11-19 12:54:53 +01001115 /* Since there is no unicode version of PyOS_double_to_string,
1116 just use the 8 bit version and then convert to unicode. */
1117 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1118 PyMem_Free(buf);
1119 if (unicode_tmp == NULL)
1120 goto done;
1121
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 /* Is a sign character present in the output? If so, remember it
1123 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001124 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1126 sign_char = '-';
1127 ++index;
1128 --n_digits;
1129 }
1130
1131 /* Determine if we have any "remainder" (after the digits, might include
1132 decimal or exponent or both (or neither)) */
1133 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1134
1135 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001136 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001137 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001138 &locale) == -1)
1139 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140
1141 /* Calculate how much memory we'll need. */
Andy Lesterad0c7752020-03-07 11:29:10 -06001142 n_total = calc_number_widths(&spec, 0, sign_char, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001144 &locale, format, &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001145 if (n_total == -1) {
1146 goto done;
1147 }
Victor Stinnera4ac6002012-01-21 15:50:49 +01001148
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001150 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151 goto done;
1152
1153 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001154 result = fill_number(writer, &spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001155 unicode_tmp, index,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001156 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001157 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158
1159done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001160 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001161 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001162 return result;
1163}
1164
1165/************************************************************************/
1166/*********** complex formatting *****************************************/
1167/************************************************************************/
1168
Victor Stinnerd3f08822012-05-29 12:57:52 +02001169static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001170format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001171 const InternalFormatSpec *format,
1172 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001173{
1174 double re;
1175 double im;
1176 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1177 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1178
1179 InternalFormatSpec tmp_format = *format;
1180 Py_ssize_t n_re_digits;
1181 Py_ssize_t n_im_digits;
1182 Py_ssize_t n_re_remainder;
1183 Py_ssize_t n_im_remainder;
1184 Py_ssize_t n_re_total;
1185 Py_ssize_t n_im_total;
1186 int re_has_decimal;
1187 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001188 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001189 Py_UCS4 type = format->type;
1190 Py_ssize_t i_re;
1191 Py_ssize_t i_im;
1192 NumberFieldWidths re_spec;
1193 NumberFieldWidths im_spec;
1194 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001195 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001196 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001197 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001198 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001199 Py_UCS4 re_sign_char = '\0';
1200 Py_UCS4 im_sign_char = '\0';
1201 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1202 int im_float_type;
1203 int add_parens = 0;
1204 int skip_re = 0;
1205 Py_ssize_t lpad;
1206 Py_ssize_t rpad;
1207 Py_ssize_t total;
1208 PyObject *re_unicode_tmp = NULL;
1209 PyObject *im_unicode_tmp = NULL;
1210
1211 /* Locale settings, either from the actual locale or
1212 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001213 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001214
Victor Stinner2f084ec2013-06-23 14:54:30 +02001215 if (format->precision > INT_MAX) {
1216 PyErr_SetString(PyExc_ValueError, "precision too big");
1217 goto done;
1218 }
1219 precision = (int)format->precision;
1220
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001221 /* Zero padding is not allowed. */
1222 if (format->fill_char == '0') {
1223 PyErr_SetString(PyExc_ValueError,
1224 "Zero padding is not allowed in complex format "
1225 "specifier");
1226 goto done;
1227 }
1228
1229 /* Neither is '=' alignment . */
1230 if (format->align == '=') {
1231 PyErr_SetString(PyExc_ValueError,
1232 "'=' alignment flag is not allowed in complex format "
1233 "specifier");
1234 goto done;
1235 }
1236
1237 re = PyComplex_RealAsDouble(value);
1238 if (re == -1.0 && PyErr_Occurred())
1239 goto done;
1240 im = PyComplex_ImagAsDouble(value);
1241 if (im == -1.0 && PyErr_Occurred())
1242 goto done;
1243
1244 if (format->alternate)
1245 flags |= Py_DTSF_ALT;
1246
1247 if (type == '\0') {
1248 /* Omitted type specifier. Should be like str(self). */
1249 type = 'r';
1250 default_precision = 0;
1251 if (re == 0.0 && copysign(1.0, re) == 1.0)
1252 skip_re = 1;
1253 else
1254 add_parens = 1;
1255 }
1256
1257 if (type == 'n')
1258 /* 'n' is the same as 'g', except for the locale used to
1259 format the result. We take care of that later. */
1260 type = 'g';
1261
1262 if (precision < 0)
1263 precision = default_precision;
1264 else if (type == 'r')
1265 type = 'g';
1266
Martin Panter4c359642016-05-08 13:53:41 +00001267 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001268 8-bit char. This is safe, because we've restricted what "type"
1269 can be. */
1270 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1271 &re_float_type);
1272 if (re_buf == NULL)
1273 goto done;
1274 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1275 &im_float_type);
1276 if (im_buf == NULL)
1277 goto done;
1278
1279 n_re_digits = strlen(re_buf);
1280 n_im_digits = strlen(im_buf);
1281
1282 /* Since there is no unicode version of PyOS_double_to_string,
1283 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001284 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 if (re_unicode_tmp == NULL)
1286 goto done;
1287 i_re = 0;
1288
Victor Stinnerd3f08822012-05-29 12:57:52 +02001289 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001290 if (im_unicode_tmp == NULL)
1291 goto done;
1292 i_im = 0;
1293
1294 /* Is a sign character present in the output? If so, remember it
1295 and skip it */
1296 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1297 re_sign_char = '-';
1298 ++i_re;
1299 --n_re_digits;
1300 }
1301 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1302 im_sign_char = '-';
1303 ++i_im;
1304 --n_im_digits;
1305 }
1306
1307 /* Determine if we have any "remainder" (after the digits, might include
1308 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001309 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001311 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312 &n_im_remainder, &im_has_decimal);
1313
1314 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001315 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001316 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001317 &locale) == -1)
1318 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319
1320 /* Turn off any padding. We'll do it later after we've composed
1321 the numbers without padding. */
1322 tmp_format.fill_char = '\0';
1323 tmp_format.align = '<';
1324 tmp_format.width = -1;
1325
1326 /* Calculate how much memory we'll need. */
Andy Lesterad0c7752020-03-07 11:29:10 -06001327 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001329 re_has_decimal, &locale, &tmp_format,
1330 &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001331 if (n_re_total == -1) {
1332 goto done;
1333 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334
1335 /* Same formatting, but always include a sign, unless the real part is
1336 * going to be omitted, in which case we use whatever sign convention was
1337 * requested by the original format. */
1338 if (!skip_re)
1339 tmp_format.sign = '+';
Andy Lesterad0c7752020-03-07 11:29:10 -06001340 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001342 im_has_decimal, &locale, &tmp_format,
1343 &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001344 if (n_im_total == -1) {
1345 goto done;
1346 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001347
1348 if (skip_re)
1349 n_re_total = 0;
1350
1351 /* Add 1 for the 'j', and optionally 2 for parens. */
1352 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1353 format->width, format->align, &lpad, &rpad, &total);
1354
Victor Stinner41a863c2012-02-24 00:37:51 +01001355 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001356 maxchar = Py_MAX(maxchar, format->fill_char);
1357
Victor Stinnerd3f08822012-05-29 12:57:52 +02001358 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001359 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001360 rkind = writer->kind;
1361 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001362
1363 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001364 result = fill_padding(writer,
1365 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001366 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001367 if (result == -1)
1368 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369
Victor Stinnerd3f08822012-05-29 12:57:52 +02001370 if (add_parens) {
1371 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1372 writer->pos++;
1373 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001374
1375 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001376 result = fill_number(writer, &re_spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001377 re_unicode_tmp, i_re,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001378 NULL, 0,
1379 0,
1380 &locale, 0);
1381 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001382 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001384 result = fill_number(writer, &im_spec,
Andy Lesterad0c7752020-03-07 11:29:10 -06001385 im_unicode_tmp, i_im,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001386 NULL, 0,
1387 0,
1388 &locale, 0);
1389 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001390 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001391 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1392 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001393
Victor Stinnerd3f08822012-05-29 12:57:52 +02001394 if (add_parens) {
1395 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1396 writer->pos++;
1397 }
1398
1399 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001400
1401done:
1402 PyMem_Free(re_buf);
1403 PyMem_Free(im_buf);
1404 Py_XDECREF(re_unicode_tmp);
1405 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001406 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001407 return result;
1408}
1409
1410/************************************************************************/
1411/*********** built in formatters ****************************************/
1412/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001413static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001414format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1415{
1416 PyObject *str;
1417 int err;
1418
1419 str = PyObject_Str(obj);
1420 if (str == NULL)
1421 return -1;
1422 err = _PyUnicodeWriter_WriteStr(writer, str);
1423 Py_DECREF(str);
1424 return err;
1425}
1426
1427int
1428_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1429 PyObject *obj,
1430 PyObject *format_spec,
1431 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001432{
1433 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001434
1435 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001436
1437 /* check for the special case of zero length format spec, make
1438 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001439 if (start == end) {
1440 if (PyUnicode_CheckExact(obj))
1441 return _PyUnicodeWriter_WriteStr(writer, obj);
1442 else
1443 return format_obj(obj, writer);
1444 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001445
1446 /* parse the format_spec */
1447 if (!parse_internal_render_format_spec(format_spec, start, end,
1448 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001449 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001450
1451 /* type conversion? */
1452 switch (format.type) {
1453 case 's':
1454 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001455 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001456 default:
1457 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001458 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001459 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001461}
1462
Victor Stinnerd3f08822012-05-29 12:57:52 +02001463int
1464_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1465 PyObject *obj,
1466 PyObject *format_spec,
1467 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001468{
Andy Lester3fe91172020-03-01 15:26:43 -06001469 PyObject *tmp = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001470 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001471 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001472
1473 /* check for the special case of zero length format spec, make
1474 it equivalent to str(obj) */
1475 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001476 if (PyLong_CheckExact(obj))
1477 return _PyLong_FormatWriter(writer, obj, 10, 0);
1478 else
1479 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001480 }
1481
1482 /* parse the format_spec */
1483 if (!parse_internal_render_format_spec(format_spec, start, end,
1484 &format, 'd', '>'))
1485 goto done;
1486
1487 /* type conversion? */
1488 switch (format.type) {
1489 case 'b':
1490 case 'c':
1491 case 'd':
1492 case 'o':
1493 case 'x':
1494 case 'X':
1495 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001496 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001497 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001498 break;
1499
1500 case 'e':
1501 case 'E':
1502 case 'f':
1503 case 'F':
1504 case 'g':
1505 case 'G':
1506 case '%':
1507 /* convert to float */
1508 tmp = PyNumber_Float(obj);
1509 if (tmp == NULL)
1510 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001511 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001512 break;
1513
1514 default:
1515 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001516 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001517 goto done;
1518 }
1519
1520done:
1521 Py_XDECREF(tmp);
1522 return result;
1523}
1524
Victor Stinnerd3f08822012-05-29 12:57:52 +02001525int
1526_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1527 PyObject *obj,
1528 PyObject *format_spec,
1529 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001530{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001531 InternalFormatSpec format;
1532
1533 /* check for the special case of zero length format spec, make
1534 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001535 if (start == end)
1536 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001537
1538 /* parse the format_spec */
1539 if (!parse_internal_render_format_spec(format_spec, start, end,
1540 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001541 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001542
1543 /* type conversion? */
1544 switch (format.type) {
1545 case '\0': /* No format code: like 'g', but with at least one decimal. */
1546 case 'e':
1547 case 'E':
1548 case 'f':
1549 case 'F':
1550 case 'g':
1551 case 'G':
1552 case 'n':
1553 case '%':
1554 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001555 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001556
1557 default:
1558 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001559 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001560 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001561 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001562}
1563
Victor Stinnerd3f08822012-05-29 12:57:52 +02001564int
1565_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1566 PyObject *obj,
1567 PyObject *format_spec,
1568 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001569{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001570 InternalFormatSpec format;
1571
1572 /* check for the special case of zero length format spec, make
1573 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001574 if (start == end)
1575 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001576
1577 /* parse the format_spec */
1578 if (!parse_internal_render_format_spec(format_spec, start, end,
1579 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001580 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001581
1582 /* type conversion? */
1583 switch (format.type) {
1584 case '\0': /* No format code: like 'g', but with at least one decimal. */
1585 case 'e':
1586 case 'E':
1587 case 'f':
1588 case 'F':
1589 case 'g':
1590 case 'G':
1591 case 'n':
1592 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001593 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001594
1595 default:
1596 /* unknown */
Victor Stinnera102ed72020-02-07 02:24:48 +01001597 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001598 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001599 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001600}