blob: 3e9e9ba08602cd0a09eb9c1c049ad1aeca887043 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Victor Stinner02e6bf72018-11-20 16:20:16 +01006#include "pycore_fileutils.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02007#include <locale.h>
8
9/* Raises an exception about an unknown presentation type for this
10 * type. */
11
12static void
13unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15{
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29}
30
31static void
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070032invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020033{
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070034 assert(specifier == ',' || specifier == '_');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020035 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070037 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020039 else
40 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070041 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020043}
44
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070046invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040047{
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49}
50
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020051/*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57*/
58static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030059get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020060 Py_ssize_t *result)
61{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030064 int kind = PyUnicode_KIND(str);
65 void *data = PyUnicode_DATA(str);
66
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030068 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 if (digitval < 0)
71 break;
72 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000073 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 */
Mark Dickinson47862d42011-12-01 15:27:04 +000078 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020079 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030081 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020082 return -1;
83 }
Mark Dickinson47862d42011-12-01 15:27:04 +000084 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030086 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020087 *result = accumulator;
88 return numdigits;
89}
90
91/************************************************************************/
92/*********** standard format specifier parsing **************************/
93/************************************************************************/
94
95/* returns true if this character is a specifier alignment token */
96Py_LOCAL_INLINE(int)
97is_alignment_token(Py_UCS4 c)
98{
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105}
106
107/* returns true if this character is a sign element */
108Py_LOCAL_INLINE(int)
109is_sign_element(Py_UCS4 c)
110{
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117}
Eric Smith8c663262007-08-25 02:26:07 +0000118
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400119/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700120enum LocaleType {
121 LT_NO_LOCALE = 0,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
Benjamin Peterson995026a2016-09-13 22:46:15 -0700124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000127
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700134 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200135 Py_ssize_t precision;
136 Py_UCS4 type;
137} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000138
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700140/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200141static void
142DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143{
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154}
155#endif
156
157
158/*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163*/
164static int
165parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170{
171 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300172 int kind = PyUnicode_KIND(format_spec);
173 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200174 /* end-pos is used throughout this code to specify the length of
175 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300176#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200177
178 Py_ssize_t consumed;
179 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181
Eric V. Smith2ea97122014-04-14 11:55:10 -0400182 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700187 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400196 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200221 format->fill_char = '0';
222 if (!align_specified) {
223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400242 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200243 ++pos;
244 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700247 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
255 invalid_comma_and_underscore();
256 return 0;
257 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200258
259 /* Parse field precision */
260 if (end-pos && READ_spec(pos) == '.') {
261 ++pos;
262
263 consumed = get_integer(format_spec, &pos, end, &format->precision);
264 if (consumed == -1)
265 /* Overflow error. Exception already set. */
266 return 0;
267
268 /* Not having a precision after a dot is an error. */
269 if (consumed == 0) {
270 PyErr_Format(PyExc_ValueError,
271 "Format specifier missing precision");
272 return 0;
273 }
274
275 }
276
277 /* Finally, parse the type field. */
278
279 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500280 /* More than one char remain, invalid format specifier. */
281 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200282 return 0;
283 }
284
285 if (end-pos == 1) {
286 format->type = READ_spec(pos);
287 ++pos;
288 }
289
290 /* Do as much validating as we can, just by looking at the format
291 specifier. Do not take into account what type of formatting
292 we're doing (int, float, string). */
293
294 if (format->thousands_separators) {
295 switch (format->type) {
296 case 'd':
297 case 'e':
298 case 'f':
299 case 'g':
300 case 'E':
301 case 'G':
302 case '%':
303 case 'F':
304 case '\0':
305 /* These are allowed. See PEP 378.*/
306 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400307 case 'b':
308 case 'o':
309 case 'x':
310 case 'X':
311 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
312 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
313 /* Every four digits, not every three, in bin/oct/hex. */
314 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
315 break;
316 }
Stefan Krahf432a322017-08-21 13:09:59 +0200317 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 default:
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700319 invalid_thousands_separator_type(format->thousands_separators, format->type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 return 0;
321 }
322 }
323
Victor Stinnera4ac6002012-01-21 15:50:49 +0100324 assert (format->align <= 127);
325 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 return 1;
327}
328
329/* Calculate the padding needed. */
330static void
331calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
332 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
333 Py_ssize_t *n_total)
334{
335 if (width >= 0) {
336 if (nchars > width)
337 *n_total = nchars;
338 else
339 *n_total = width;
340 }
341 else {
342 /* not specified, use all of the chars and no more */
343 *n_total = nchars;
344 }
345
346 /* Figure out how much leading space we need, based on the
347 aligning */
348 if (align == '>')
349 *n_lpadding = *n_total - nchars;
350 else if (align == '^')
351 *n_lpadding = (*n_total - nchars) / 2;
352 else if (align == '<' || align == '=')
353 *n_lpadding = 0;
354 else {
355 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700356 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 }
358
359 *n_rpadding = *n_total - nchars - *n_lpadding;
360}
361
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200362/* Do the padding, and return a pointer to where the caller-supplied
363 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200364static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200365fill_padding(_PyUnicodeWriter *writer,
366 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200367 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
368 Py_ssize_t n_rpadding)
369{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200370 Py_ssize_t pos;
371
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200372 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200373 if (n_lpadding) {
374 pos = writer->pos;
375 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
376 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377
378 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200379 if (n_rpadding) {
380 pos = writer->pos + nchars + n_lpadding;
381 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
382 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200383
384 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200385 writer->pos += n_lpadding;
386 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387}
388
389/************************************************************************/
390/*********** common routines for numeric formatting *********************/
391/************************************************************************/
392
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393/* Locale info needed for formatting integers and the part of floats
394 before and including the decimal. Note that locales only support
395 8-bit chars, not unicode. */
396typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100397 PyObject *decimal_point;
398 PyObject *thousands_sep;
399 const char *grouping;
Victor Stinner02e6bf72018-11-20 16:20:16 +0100400 char *grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200401} LocaleInfo;
402
Victor Stinner02e6bf72018-11-20 16:20:16 +0100403#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
Victor Stinner41a863c2012-02-24 00:37:51 +0100404
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405/* describes the layout for an integer, see the comment in
406 calc_number_widths() for details */
407typedef struct {
408 Py_ssize_t n_lpadding;
409 Py_ssize_t n_prefix;
410 Py_ssize_t n_spadding;
411 Py_ssize_t n_rpadding;
412 char sign;
413 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
414 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
415 any grouping chars. */
416 Py_ssize_t n_decimal; /* 0 if only an integer */
417 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
418 excluding the decimal itself, if
419 present. */
420
421 /* These 2 are not the widths of fields, but are needed by
422 STRINGLIB_GROUPING. */
423 Py_ssize_t n_digits; /* The number of digits before a decimal
424 or exponent. */
425 Py_ssize_t n_min_width; /* The min_width we used when we computed
426 the n_grouped_digits width. */
427} NumberFieldWidths;
428
429
430/* Given a number of the form:
431 digits[remainder]
432 where ptr points to the start and end points to the end, find where
433 the integer part ends. This could be a decimal, an exponent, both,
434 or neither.
435 If a decimal point is present, set *has_decimal and increment
436 remainder beyond it.
437 Results are undefined (but shouldn't crash) for improperly
438 formatted strings.
439*/
440static void
441parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
442 Py_ssize_t *n_remainder, int *has_decimal)
443{
444 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300445 int kind = PyUnicode_KIND(s);
446 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300448 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 ++pos;
450 remainder = pos;
451
452 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300453 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454
455 /* Skip the decimal point. */
456 if (*has_decimal)
457 remainder++;
458
459 *n_remainder = end - remainder;
460}
461
462/* not all fields of format are used. for example, precision is
463 unused. should this take discrete params in order to be more clear
464 about what it does? or is passing a single format parameter easier
Victor Stinner59423e32018-11-26 13:40:01 +0100465 and more efficient enough to justify a little obfuscation?
466 Return -1 on error. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467static Py_ssize_t
468calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
469 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
470 Py_ssize_t n_end, Py_ssize_t n_remainder,
471 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100472 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200473{
474 Py_ssize_t n_non_digit_non_padding;
475 Py_ssize_t n_padding;
476
477 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
478 spec->n_lpadding = 0;
479 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100480 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200481 spec->n_remainder = n_remainder;
482 spec->n_spadding = 0;
483 spec->n_rpadding = 0;
484 spec->sign = '\0';
485 spec->n_sign = 0;
486
487 /* the output will look like:
488 | |
489 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
490 | |
491
492 sign is computed from format->sign and the actual
493 sign of the number
494
495 prefix is given (it's for the '0x' prefix)
496
497 digits is already known
498
499 the total width is either given, or computed from the
500 actual digits
501
502 only one of lpadding, spadding, and rpadding can be non-zero,
503 and it's calculated from the width and other fields
504 */
505
506 /* compute the various parts we're going to write */
507 switch (format->sign) {
508 case '+':
509 /* always put a + or - */
510 spec->n_sign = 1;
511 spec->sign = (sign_char == '-' ? '-' : '+');
512 break;
513 case ' ':
514 spec->n_sign = 1;
515 spec->sign = (sign_char == '-' ? '-' : ' ');
516 break;
517 default:
518 /* Not specified, or the default (-) */
519 if (sign_char == '-') {
520 spec->n_sign = 1;
521 spec->sign = '-';
522 }
523 }
524
525 /* The number of chars used for non-digits and non-padding. */
526 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
527 spec->n_remainder;
528
529 /* min_width can go negative, that's okay. format->width == -1 means
530 we don't care. */
531 if (format->fill_char == '0' && format->align == '=')
532 spec->n_min_width = format->width - n_non_digit_non_padding;
533 else
534 spec->n_min_width = 0;
535
536 if (spec->n_digits == 0)
537 /* This case only occurs when using 'c' formatting, we need
538 to special case it because the grouping code always wants
539 to have at least one character. */
540 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100541 else {
542 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200543 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100544 NULL, 0,
Victor Stinner59423e32018-11-26 13:40:01 +0100545 NULL, 0, spec->n_digits,
546 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100547 locale->grouping, locale->thousands_sep, &grouping_maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +0100548 if (spec->n_grouped_digits == -1) {
549 return -1;
550 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100551 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
552 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200553
554 /* Given the desired width and the total of digit and non-digit
555 space we consume, see if we need any padding. format->width can
556 be negative (meaning no padding), but this code still works in
557 that case. */
558 n_padding = format->width -
559 (n_non_digit_non_padding + spec->n_grouped_digits);
560 if (n_padding > 0) {
561 /* Some padding is needed. Determine if it's left, space, or right. */
562 switch (format->align) {
563 case '<':
564 spec->n_rpadding = n_padding;
565 break;
566 case '^':
567 spec->n_lpadding = n_padding / 2;
568 spec->n_rpadding = n_padding - spec->n_lpadding;
569 break;
570 case '=':
571 spec->n_spadding = n_padding;
572 break;
573 case '>':
574 spec->n_lpadding = n_padding;
575 break;
576 default:
577 /* Shouldn't get here, but treat it as '>' */
Barry Warsawb2e57942017-09-14 18:13:16 -0700578 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200579 }
580 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100581
582 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
583 *maxchar = Py_MAX(*maxchar, format->fill_char);
584
Victor Stinner90f50d42012-02-24 01:44:47 +0100585 if (spec->n_decimal)
586 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
587
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200588 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
589 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
590 spec->n_remainder + spec->n_rpadding;
591}
592
593/* Fill in the digit parts of a numbers's string representation,
594 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200595 Return -1 on error, or 0 on success. */
596static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200597fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200598 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200599 PyObject *prefix, Py_ssize_t p_start,
600 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601 LocaleInfo *locale, int toupper)
602{
603 /* Used to keep track of digits, decimal, and remainder. */
604 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200605 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200606 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608
609 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200610 _PyUnicode_FastFill(writer->buffer,
611 writer->pos, spec->n_lpadding, fill_char);
612 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200613 }
614 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200615 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
616 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200617 }
618 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200619 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
620 prefix, p_start,
621 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200622 if (toupper) {
623 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500624 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200625 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100626 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100627 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200628 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500629 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200630 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200631 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200632 }
633 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200634 _PyUnicode_FastFill(writer->buffer,
635 writer->pos, spec->n_spadding, fill_char);
636 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200637 }
638
639 /* Only for type 'c' special case, it has no digits. */
640 if (spec->n_digits != 0) {
641 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinner90f50d42012-02-24 01:44:47 +0100642 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinner59423e32018-11-26 13:40:01 +0100643 writer, spec->n_grouped_digits,
644 digits, d_pos, spec->n_digits,
645 spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100646 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100647 if (r == -1)
648 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200650 d_pos += spec->n_digits;
651 }
652 if (toupper) {
653 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500654 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200655 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100656 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500657 if (c > 127) {
658 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
659 return -1;
660 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200661 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500662 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200663 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200664 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200665
666 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200667 _PyUnicode_FastCopyCharacters(
668 writer->buffer, writer->pos,
669 locale->decimal_point, 0, spec->n_decimal);
670 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200671 d_pos += 1;
672 }
673
674 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200675 _PyUnicode_FastCopyCharacters(
676 writer->buffer, writer->pos,
677 digits, d_pos, spec->n_remainder);
678 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400679 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200680 }
681
682 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200683 _PyUnicode_FastFill(writer->buffer,
684 writer->pos, spec->n_rpadding,
685 fill_char);
686 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200688 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200689}
690
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200691static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200692
693/* Find the decimal point character(s?), thousands_separator(s?), and
694 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400695 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
696 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100697static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700698get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200699{
700 switch (type) {
701 case LT_CURRENT_LOCALE: {
Victor Stinner02e6bf72018-11-20 16:20:16 +0100702 struct lconv *lc = localeconv();
703 if (_Py_GetLocaleconvNumeric(lc,
704 &locale_info->decimal_point,
705 &locale_info->thousands_sep) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100706 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100707 }
Victor Stinner02e6bf72018-11-20 16:20:16 +0100708
709 /* localeconv() grouping can become a dangling pointer or point
710 to a different string if another thread calls localeconv() during
711 the string formatting. Copy the string to avoid this risk. */
712 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
713 if (locale_info->grouping_buffer == NULL) {
714 PyErr_NoMemory();
715 return -1;
716 }
717 locale_info->grouping = locale_info->grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200718 break;
719 }
720 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400721 case LT_UNDERSCORE_LOCALE:
722 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100723 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400724 locale_info->thousands_sep = PyUnicode_FromOrdinal(
725 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700726 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100727 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400728 if (type != LT_UNDER_FOUR_LOCALE)
729 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200730 (implicit) trailing 0 means repeat
731 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400732 else
733 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 break;
735 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100736 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
737 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700738 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100739 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200740 locale_info->grouping = no_grouping;
741 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200742 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100743 return 0;
744}
745
746static void
747free_locale_info(LocaleInfo *locale_info)
748{
749 Py_XDECREF(locale_info->decimal_point);
750 Py_XDECREF(locale_info->thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +0100751 PyMem_Free(locale_info->grouping_buffer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200752}
753
754/************************************************************************/
755/*********** string formatting ******************************************/
756/************************************************************************/
757
Victor Stinnerd3f08822012-05-29 12:57:52 +0200758static int
759format_string_internal(PyObject *value, const InternalFormatSpec *format,
760 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761{
762 Py_ssize_t lpad;
763 Py_ssize_t rpad;
764 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200765 Py_ssize_t len;
766 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200767 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768
Victor Stinnerd3f08822012-05-29 12:57:52 +0200769 assert(PyUnicode_IS_READY(value));
770 len = PyUnicode_GET_LENGTH(value);
771
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200772 /* sign is not allowed on strings */
773 if (format->sign != '\0') {
774 PyErr_SetString(PyExc_ValueError,
775 "Sign not allowed in string format specifier");
776 goto done;
777 }
778
779 /* alternate is not allowed on strings */
780 if (format->alternate) {
781 PyErr_SetString(PyExc_ValueError,
782 "Alternate form (#) not allowed in string format "
783 "specifier");
784 goto done;
785 }
786
787 /* '=' alignment not allowed on strings */
788 if (format->align == '=') {
789 PyErr_SetString(PyExc_ValueError,
790 "'=' alignment not allowed "
791 "in string format specifier");
792 goto done;
793 }
794
Victor Stinner621ef3d2012-10-02 00:33:47 +0200795 if ((format->width == -1 || format->width <= len)
796 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200797 /* Fast path */
798 return _PyUnicodeWriter_WriteStr(writer, value);
799 }
800
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801 /* if precision is specified, output no more that format.precision
802 characters */
803 if (format->precision >= 0 && len >= format->precision) {
804 len = format->precision;
805 }
806
807 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
808
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200809 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100810 if (lpad != 0 || rpad != 0)
811 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200812 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
813 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
814 maxchar = Py_MAX(maxchar, valmaxchar);
815 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100816
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200817 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200818 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200819 goto done;
820
821 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400822 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200823 if (result == -1)
824 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200825
826 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200827 if (len) {
828 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
829 value, 0, len);
830 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200831 writer->pos += (len + rpad);
832 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200833
834done:
835 return result;
836}
837
838
839/************************************************************************/
840/*********** long formatting ********************************************/
841/************************************************************************/
842
Victor Stinnerd3f08822012-05-29 12:57:52 +0200843static int
844format_long_internal(PyObject *value, const InternalFormatSpec *format,
845 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200846{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200847 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100848 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200849 PyObject *tmp = NULL;
850 Py_ssize_t inumeric_chars;
851 Py_UCS4 sign_char = '\0';
852 Py_ssize_t n_digits; /* count of digits need from the computed
853 string */
854 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
855 produces non-digits */
856 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
857 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100858 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200859 NumberFieldWidths spec;
860 long x;
861
862 /* Locale settings, either from the actual locale or
863 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +0100864 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200865
866 /* no precision allowed on integers */
867 if (format->precision != -1) {
868 PyErr_SetString(PyExc_ValueError,
869 "Precision not allowed in integer format specifier");
870 goto done;
871 }
872
873 /* special case for character formatting */
874 if (format->type == 'c') {
875 /* error to specify a sign */
876 if (format->sign != '\0') {
877 PyErr_SetString(PyExc_ValueError,
878 "Sign not allowed with integer"
879 " format specifier 'c'");
880 goto done;
881 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400882 /* error to request alternate format */
883 if (format->alternate) {
884 PyErr_SetString(PyExc_ValueError,
885 "Alternate form (#) not allowed with integer"
886 " format specifier 'c'");
887 goto done;
888 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200889
890 /* taken from unicodeobject.c formatchar() */
891 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200892 x = PyLong_AsLong(value);
893 if (x == -1 && PyErr_Occurred())
894 goto done;
895 if (x < 0 || x > 0x10ffff) {
896 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100897 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200898 goto done;
899 }
900 tmp = PyUnicode_FromOrdinal(x);
901 inumeric_chars = 0;
902 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100903 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200904
905 /* As a sort-of hack, we tell calc_number_widths that we only
906 have "remainder" characters. calc_number_widths thinks
907 these are characters that don't get formatted, only copied
908 into the output string. We do this for 'c' formatting,
909 because the characters are likely to be non-digits. */
910 n_remainder = 1;
911 }
912 else {
913 int base;
914 int leading_chars_to_skip = 0; /* Number of characters added by
915 PyNumber_ToBase that we want to
916 skip over. */
917
918 /* Compute the base and how many characters will be added by
919 PyNumber_ToBase */
920 switch (format->type) {
921 case 'b':
922 base = 2;
923 leading_chars_to_skip = 2; /* 0b */
924 break;
925 case 'o':
926 base = 8;
927 leading_chars_to_skip = 2; /* 0o */
928 break;
929 case 'x':
930 case 'X':
931 base = 16;
932 leading_chars_to_skip = 2; /* 0x */
933 break;
934 default: /* shouldn't be needed, but stops a compiler warning */
935 case 'd':
936 case 'n':
937 base = 10;
938 break;
939 }
940
Victor Stinnerd3f08822012-05-29 12:57:52 +0200941 if (format->sign != '+' && format->sign != ' '
942 && format->width == -1
943 && format->type != 'X' && format->type != 'n'
944 && !format->thousands_separators
945 && PyLong_CheckExact(value))
946 {
947 /* Fast path */
948 return _PyLong_FormatWriter(writer, value, base, format->alternate);
949 }
950
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200951 /* The number of prefix chars is the same as the leading
952 chars to skip */
953 if (format->alternate)
954 n_prefix = leading_chars_to_skip;
955
956 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200957 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
959 goto done;
960
961 inumeric_chars = 0;
962 n_digits = PyUnicode_GET_LENGTH(tmp);
963
964 prefix = inumeric_chars;
965
966 /* Is a sign character present in the output? If so, remember it
967 and skip it */
968 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
969 sign_char = '-';
970 ++prefix;
971 ++leading_chars_to_skip;
972 }
973
974 /* Skip over the leading chars (0x, 0b, etc.) */
975 n_digits -= leading_chars_to_skip;
976 inumeric_chars += leading_chars_to_skip;
977 }
978
979 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100980 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400981 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100982 &locale) == -1)
983 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200984
985 /* Calculate how much memory we'll need. */
986 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100987 inumeric_chars + n_digits, n_remainder, 0,
988 &locale, format, &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +0100989 if (n_total == -1) {
990 goto done;
991 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100992
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200994 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 goto done;
996
997 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200998 result = fill_number(writer, &spec,
999 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001000 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001001 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002
1003done:
1004 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001005 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 return result;
1007}
1008
1009/************************************************************************/
1010/*********** float formatting *******************************************/
1011/************************************************************************/
1012
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001013/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001014static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001015format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001016 const InternalFormatSpec *format,
1017 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001018{
1019 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1020 Py_ssize_t n_digits;
1021 Py_ssize_t n_remainder;
1022 Py_ssize_t n_total;
1023 int has_decimal;
1024 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001025 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026 Py_UCS4 type = format->type;
1027 int add_pct = 0;
1028 Py_ssize_t index;
1029 NumberFieldWidths spec;
1030 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001031 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001032 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001033 Py_UCS4 sign_char = '\0';
1034 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1035 PyObject *unicode_tmp = NULL;
1036
1037 /* Locale settings, either from the actual locale or
1038 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001039 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001040
Victor Stinner2f084ec2013-06-23 14:54:30 +02001041 if (format->precision > INT_MAX) {
1042 PyErr_SetString(PyExc_ValueError, "precision too big");
1043 goto done;
1044 }
1045 precision = (int)format->precision;
1046
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001047 if (format->alternate)
1048 flags |= Py_DTSF_ALT;
1049
1050 if (type == '\0') {
1051 /* Omitted type specifier. Behaves in the same way as repr(x)
1052 and str(x) if no precision is given, else like 'g', but with
1053 at least one digit after the decimal point. */
1054 flags |= Py_DTSF_ADD_DOT_0;
1055 type = 'r';
1056 default_precision = 0;
1057 }
1058
1059 if (type == 'n')
1060 /* 'n' is the same as 'g', except for the locale used to
1061 format the result. We take care of that later. */
1062 type = 'g';
1063
1064 val = PyFloat_AsDouble(value);
1065 if (val == -1.0 && PyErr_Occurred())
1066 goto done;
1067
1068 if (type == '%') {
1069 type = 'f';
1070 val *= 100;
1071 add_pct = 1;
1072 }
1073
1074 if (precision < 0)
1075 precision = default_precision;
1076 else if (type == 'r')
1077 type = 'g';
1078
Martin Panter4c359642016-05-08 13:53:41 +00001079 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 8-bit char. This is safe, because we've restricted what "type"
1081 can be. */
1082 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1083 &float_type);
1084 if (buf == NULL)
1085 goto done;
1086 n_digits = strlen(buf);
1087
1088 if (add_pct) {
1089 /* We know that buf has a trailing zero (since we just called
1090 strlen() on it), and we don't use that fact any more. So we
1091 can just write over the trailing zero. */
1092 buf[n_digits] = '%';
1093 n_digits += 1;
1094 }
1095
Victor Stinnerd3f08822012-05-29 12:57:52 +02001096 if (format->sign != '+' && format->sign != ' '
1097 && format->width == -1
1098 && format->type != 'n'
1099 && !format->thousands_separators)
1100 {
1101 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001102 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1103 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001104 return result;
1105 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001106
Victor Stinner4a587072013-11-19 12:54:53 +01001107 /* Since there is no unicode version of PyOS_double_to_string,
1108 just use the 8 bit version and then convert to unicode. */
1109 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1110 PyMem_Free(buf);
1111 if (unicode_tmp == NULL)
1112 goto done;
1113
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 /* Is a sign character present in the output? If so, remember it
1115 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001116 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001117 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1118 sign_char = '-';
1119 ++index;
1120 --n_digits;
1121 }
1122
1123 /* Determine if we have any "remainder" (after the digits, might include
1124 decimal or exponent or both (or neither)) */
1125 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1126
1127 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001128 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001129 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001130 &locale) == -1)
1131 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001132
1133 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001134 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001136 &locale, format, &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001137 if (n_total == -1) {
1138 goto done;
1139 }
Victor Stinnera4ac6002012-01-21 15:50:49 +01001140
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001141 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001142 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 goto done;
1144
1145 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001146 result = fill_number(writer, &spec,
1147 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001148 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001149 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150
1151done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001152 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001153 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001154 return result;
1155}
1156
1157/************************************************************************/
1158/*********** complex formatting *****************************************/
1159/************************************************************************/
1160
Victor Stinnerd3f08822012-05-29 12:57:52 +02001161static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001162format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001163 const InternalFormatSpec *format,
1164 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165{
1166 double re;
1167 double im;
1168 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1169 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1170
1171 InternalFormatSpec tmp_format = *format;
1172 Py_ssize_t n_re_digits;
1173 Py_ssize_t n_im_digits;
1174 Py_ssize_t n_re_remainder;
1175 Py_ssize_t n_im_remainder;
1176 Py_ssize_t n_re_total;
1177 Py_ssize_t n_im_total;
1178 int re_has_decimal;
1179 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001180 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001181 Py_UCS4 type = format->type;
1182 Py_ssize_t i_re;
1183 Py_ssize_t i_im;
1184 NumberFieldWidths re_spec;
1185 NumberFieldWidths im_spec;
1186 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001187 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001188 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001189 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001190 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 Py_UCS4 re_sign_char = '\0';
1192 Py_UCS4 im_sign_char = '\0';
1193 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1194 int im_float_type;
1195 int add_parens = 0;
1196 int skip_re = 0;
1197 Py_ssize_t lpad;
1198 Py_ssize_t rpad;
1199 Py_ssize_t total;
1200 PyObject *re_unicode_tmp = NULL;
1201 PyObject *im_unicode_tmp = NULL;
1202
1203 /* Locale settings, either from the actual locale or
1204 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001205 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001206
Victor Stinner2f084ec2013-06-23 14:54:30 +02001207 if (format->precision > INT_MAX) {
1208 PyErr_SetString(PyExc_ValueError, "precision too big");
1209 goto done;
1210 }
1211 precision = (int)format->precision;
1212
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001213 /* Zero padding is not allowed. */
1214 if (format->fill_char == '0') {
1215 PyErr_SetString(PyExc_ValueError,
1216 "Zero padding is not allowed in complex format "
1217 "specifier");
1218 goto done;
1219 }
1220
1221 /* Neither is '=' alignment . */
1222 if (format->align == '=') {
1223 PyErr_SetString(PyExc_ValueError,
1224 "'=' alignment flag is not allowed in complex format "
1225 "specifier");
1226 goto done;
1227 }
1228
1229 re = PyComplex_RealAsDouble(value);
1230 if (re == -1.0 && PyErr_Occurred())
1231 goto done;
1232 im = PyComplex_ImagAsDouble(value);
1233 if (im == -1.0 && PyErr_Occurred())
1234 goto done;
1235
1236 if (format->alternate)
1237 flags |= Py_DTSF_ALT;
1238
1239 if (type == '\0') {
1240 /* Omitted type specifier. Should be like str(self). */
1241 type = 'r';
1242 default_precision = 0;
1243 if (re == 0.0 && copysign(1.0, re) == 1.0)
1244 skip_re = 1;
1245 else
1246 add_parens = 1;
1247 }
1248
1249 if (type == 'n')
1250 /* 'n' is the same as 'g', except for the locale used to
1251 format the result. We take care of that later. */
1252 type = 'g';
1253
1254 if (precision < 0)
1255 precision = default_precision;
1256 else if (type == 'r')
1257 type = 'g';
1258
Martin Panter4c359642016-05-08 13:53:41 +00001259 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001260 8-bit char. This is safe, because we've restricted what "type"
1261 can be. */
1262 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1263 &re_float_type);
1264 if (re_buf == NULL)
1265 goto done;
1266 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1267 &im_float_type);
1268 if (im_buf == NULL)
1269 goto done;
1270
1271 n_re_digits = strlen(re_buf);
1272 n_im_digits = strlen(im_buf);
1273
1274 /* Since there is no unicode version of PyOS_double_to_string,
1275 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001276 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001277 if (re_unicode_tmp == NULL)
1278 goto done;
1279 i_re = 0;
1280
Victor Stinnerd3f08822012-05-29 12:57:52 +02001281 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 if (im_unicode_tmp == NULL)
1283 goto done;
1284 i_im = 0;
1285
1286 /* Is a sign character present in the output? If so, remember it
1287 and skip it */
1288 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1289 re_sign_char = '-';
1290 ++i_re;
1291 --n_re_digits;
1292 }
1293 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1294 im_sign_char = '-';
1295 ++i_im;
1296 --n_im_digits;
1297 }
1298
1299 /* Determine if we have any "remainder" (after the digits, might include
1300 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001301 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001303 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 &n_im_remainder, &im_has_decimal);
1305
1306 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001307 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001308 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001309 &locale) == -1)
1310 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311
1312 /* Turn off any padding. We'll do it later after we've composed
1313 the numbers without padding. */
1314 tmp_format.fill_char = '\0';
1315 tmp_format.align = '<';
1316 tmp_format.width = -1;
1317
1318 /* Calculate how much memory we'll need. */
1319 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1320 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001321 re_has_decimal, &locale, &tmp_format,
1322 &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001323 if (n_re_total == -1) {
1324 goto done;
1325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 /* Same formatting, but always include a sign, unless the real part is
1328 * going to be omitted, in which case we use whatever sign convention was
1329 * requested by the original format. */
1330 if (!skip_re)
1331 tmp_format.sign = '+';
1332 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1333 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001334 im_has_decimal, &locale, &tmp_format,
1335 &maxchar);
Victor Stinner59423e32018-11-26 13:40:01 +01001336 if (n_im_total == -1) {
1337 goto done;
1338 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339
1340 if (skip_re)
1341 n_re_total = 0;
1342
1343 /* Add 1 for the 'j', and optionally 2 for parens. */
1344 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1345 format->width, format->align, &lpad, &rpad, &total);
1346
Victor Stinner41a863c2012-02-24 00:37:51 +01001347 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001348 maxchar = Py_MAX(maxchar, format->fill_char);
1349
Victor Stinnerd3f08822012-05-29 12:57:52 +02001350 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001352 rkind = writer->kind;
1353 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354
1355 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001356 result = fill_padding(writer,
1357 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001358 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001359 if (result == -1)
1360 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001361
Victor Stinnerd3f08822012-05-29 12:57:52 +02001362 if (add_parens) {
1363 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1364 writer->pos++;
1365 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001366
1367 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001368 result = fill_number(writer, &re_spec,
1369 re_unicode_tmp, i_re, i_re + n_re_digits,
1370 NULL, 0,
1371 0,
1372 &locale, 0);
1373 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001374 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001375 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001376 result = fill_number(writer, &im_spec,
1377 im_unicode_tmp, i_im, i_im + n_im_digits,
1378 NULL, 0,
1379 0,
1380 &locale, 0);
1381 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001382 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001383 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1384 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001385
Victor Stinnerd3f08822012-05-29 12:57:52 +02001386 if (add_parens) {
1387 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1388 writer->pos++;
1389 }
1390
1391 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001392
1393done:
1394 PyMem_Free(re_buf);
1395 PyMem_Free(im_buf);
1396 Py_XDECREF(re_unicode_tmp);
1397 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001398 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 return result;
1400}
1401
1402/************************************************************************/
1403/*********** built in formatters ****************************************/
1404/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001405static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001406format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1407{
1408 PyObject *str;
1409 int err;
1410
1411 str = PyObject_Str(obj);
1412 if (str == NULL)
1413 return -1;
1414 err = _PyUnicodeWriter_WriteStr(writer, str);
1415 Py_DECREF(str);
1416 return err;
1417}
1418
1419int
1420_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1421 PyObject *obj,
1422 PyObject *format_spec,
1423 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001424{
1425 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001426
1427 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001428
1429 /* check for the special case of zero length format spec, make
1430 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001431 if (start == end) {
1432 if (PyUnicode_CheckExact(obj))
1433 return _PyUnicodeWriter_WriteStr(writer, obj);
1434 else
1435 return format_obj(obj, writer);
1436 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001437
1438 /* parse the format_spec */
1439 if (!parse_internal_render_format_spec(format_spec, start, end,
1440 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001441 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001442
1443 /* type conversion? */
1444 switch (format.type) {
1445 case 's':
1446 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001447 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001448 default:
1449 /* unknown */
1450 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001451 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001452 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001453}
1454
Victor Stinnerd3f08822012-05-29 12:57:52 +02001455int
1456_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1457 PyObject *obj,
1458 PyObject *format_spec,
1459 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001460{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001461 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001462 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001463 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001464
1465 /* check for the special case of zero length format spec, make
1466 it equivalent to str(obj) */
1467 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001468 if (PyLong_CheckExact(obj))
1469 return _PyLong_FormatWriter(writer, obj, 10, 0);
1470 else
1471 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001472 }
1473
1474 /* parse the format_spec */
1475 if (!parse_internal_render_format_spec(format_spec, start, end,
1476 &format, 'd', '>'))
1477 goto done;
1478
1479 /* type conversion? */
1480 switch (format.type) {
1481 case 'b':
1482 case 'c':
1483 case 'd':
1484 case 'o':
1485 case 'x':
1486 case 'X':
1487 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001488 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001489 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001490 break;
1491
1492 case 'e':
1493 case 'E':
1494 case 'f':
1495 case 'F':
1496 case 'g':
1497 case 'G':
1498 case '%':
1499 /* convert to float */
1500 tmp = PyNumber_Float(obj);
1501 if (tmp == NULL)
1502 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001503 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001504 break;
1505
1506 default:
1507 /* unknown */
1508 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1509 goto done;
1510 }
1511
1512done:
1513 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001514 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001515 return result;
1516}
1517
Victor Stinnerd3f08822012-05-29 12:57:52 +02001518int
1519_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1520 PyObject *obj,
1521 PyObject *format_spec,
1522 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001523{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001524 InternalFormatSpec format;
1525
1526 /* check for the special case of zero length format spec, make
1527 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001528 if (start == end)
1529 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001530
1531 /* parse the format_spec */
1532 if (!parse_internal_render_format_spec(format_spec, start, end,
1533 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001534 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001535
1536 /* type conversion? */
1537 switch (format.type) {
1538 case '\0': /* No format code: like 'g', but with at least one decimal. */
1539 case 'e':
1540 case 'E':
1541 case 'f':
1542 case 'F':
1543 case 'g':
1544 case 'G':
1545 case 'n':
1546 case '%':
1547 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001548 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001549
1550 default:
1551 /* unknown */
1552 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001553 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001554 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001555}
1556
Victor Stinnerd3f08822012-05-29 12:57:52 +02001557int
1558_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1559 PyObject *obj,
1560 PyObject *format_spec,
1561 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001562{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001563 InternalFormatSpec format;
1564
1565 /* check for the special case of zero length format spec, make
1566 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001567 if (start == end)
1568 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001569
1570 /* parse the format_spec */
1571 if (!parse_internal_render_format_spec(format_spec, start, end,
1572 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001573 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001574
1575 /* type conversion? */
1576 switch (format.type) {
1577 case '\0': /* No format code: like 'g', but with at least one decimal. */
1578 case 'e':
1579 case 'E':
1580 case 'f':
1581 case 'F':
1582 case 'g':
1583 case 'G':
1584 case 'n':
1585 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001586 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001587
1588 default:
1589 /* unknown */
1590 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001591 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001592 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001593}