blob: e12ba49bd2c15a8b5d5173cbcb2512172abf238a [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Victor Stinner02e6bf72018-11-20 16:20:16 +01006#include "pycore_fileutils.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02007#include <locale.h>
8
9/* Raises an exception about an unknown presentation type for this
10 * type. */
11
12static void
13unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15{
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29}
30
31static void
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070032invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020033{
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070034 assert(specifier == ',' || specifier == '_');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020035 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070037 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020039 else
40 PyErr_Format(PyExc_ValueError,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -070041 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020043}
44
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070046invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040047{
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49}
50
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020051/*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57*/
58static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030059get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020060 Py_ssize_t *result)
61{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030064 int kind = PyUnicode_KIND(str);
65 void *data = PyUnicode_DATA(str);
66
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030068 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 if (digitval < 0)
71 break;
72 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000073 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 */
Mark Dickinson47862d42011-12-01 15:27:04 +000078 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020079 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030081 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020082 return -1;
83 }
Mark Dickinson47862d42011-12-01 15:27:04 +000084 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030086 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020087 *result = accumulator;
88 return numdigits;
89}
90
91/************************************************************************/
92/*********** standard format specifier parsing **************************/
93/************************************************************************/
94
95/* returns true if this character is a specifier alignment token */
96Py_LOCAL_INLINE(int)
97is_alignment_token(Py_UCS4 c)
98{
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105}
106
107/* returns true if this character is a sign element */
108Py_LOCAL_INLINE(int)
109is_sign_element(Py_UCS4 c)
110{
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117}
Eric Smith8c663262007-08-25 02:26:07 +0000118
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400119/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700120enum LocaleType {
121 LT_NO_LOCALE = 0,
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
Benjamin Peterson995026a2016-09-13 22:46:15 -0700124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000127
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700134 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200135 Py_ssize_t precision;
136 Py_UCS4 type;
137} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000138
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700140/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200141static void
142DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143{
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154}
155#endif
156
157
158/*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163*/
164static int
165parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170{
171 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300172 int kind = PyUnicode_KIND(format_spec);
173 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200174 /* end-pos is used throughout this code to specify the length of
175 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300176#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200177
178 Py_ssize_t consumed;
179 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181
Eric V. Smith2ea97122014-04-14 11:55:10 -0400182 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700187 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400196 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200221 format->fill_char = '0';
222 if (!align_specified) {
223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400242 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200243 ++pos;
244 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700247 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
255 invalid_comma_and_underscore();
256 return 0;
257 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200258
259 /* Parse field precision */
260 if (end-pos && READ_spec(pos) == '.') {
261 ++pos;
262
263 consumed = get_integer(format_spec, &pos, end, &format->precision);
264 if (consumed == -1)
265 /* Overflow error. Exception already set. */
266 return 0;
267
268 /* Not having a precision after a dot is an error. */
269 if (consumed == 0) {
270 PyErr_Format(PyExc_ValueError,
271 "Format specifier missing precision");
272 return 0;
273 }
274
275 }
276
277 /* Finally, parse the type field. */
278
279 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500280 /* More than one char remain, invalid format specifier. */
281 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200282 return 0;
283 }
284
285 if (end-pos == 1) {
286 format->type = READ_spec(pos);
287 ++pos;
288 }
289
290 /* Do as much validating as we can, just by looking at the format
291 specifier. Do not take into account what type of formatting
292 we're doing (int, float, string). */
293
294 if (format->thousands_separators) {
295 switch (format->type) {
296 case 'd':
297 case 'e':
298 case 'f':
299 case 'g':
300 case 'E':
301 case 'G':
302 case '%':
303 case 'F':
304 case '\0':
305 /* These are allowed. See PEP 378.*/
306 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400307 case 'b':
308 case 'o':
309 case 'x':
310 case 'X':
311 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
312 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
313 /* Every four digits, not every three, in bin/oct/hex. */
314 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
315 break;
316 }
Stefan Krahf432a322017-08-21 13:09:59 +0200317 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 default:
Benjamin Petersoncbda8fc2018-10-01 21:54:39 -0700319 invalid_thousands_separator_type(format->thousands_separators, format->type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 return 0;
321 }
322 }
323
Victor Stinnera4ac6002012-01-21 15:50:49 +0100324 assert (format->align <= 127);
325 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 return 1;
327}
328
329/* Calculate the padding needed. */
330static void
331calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
332 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
333 Py_ssize_t *n_total)
334{
335 if (width >= 0) {
336 if (nchars > width)
337 *n_total = nchars;
338 else
339 *n_total = width;
340 }
341 else {
342 /* not specified, use all of the chars and no more */
343 *n_total = nchars;
344 }
345
346 /* Figure out how much leading space we need, based on the
347 aligning */
348 if (align == '>')
349 *n_lpadding = *n_total - nchars;
350 else if (align == '^')
351 *n_lpadding = (*n_total - nchars) / 2;
352 else if (align == '<' || align == '=')
353 *n_lpadding = 0;
354 else {
355 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700356 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 }
358
359 *n_rpadding = *n_total - nchars - *n_lpadding;
360}
361
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200362/* Do the padding, and return a pointer to where the caller-supplied
363 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200364static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200365fill_padding(_PyUnicodeWriter *writer,
366 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200367 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
368 Py_ssize_t n_rpadding)
369{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200370 Py_ssize_t pos;
371
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200372 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200373 if (n_lpadding) {
374 pos = writer->pos;
375 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
376 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377
378 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200379 if (n_rpadding) {
380 pos = writer->pos + nchars + n_lpadding;
381 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
382 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200383
384 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200385 writer->pos += n_lpadding;
386 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387}
388
389/************************************************************************/
390/*********** common routines for numeric formatting *********************/
391/************************************************************************/
392
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393/* Locale info needed for formatting integers and the part of floats
394 before and including the decimal. Note that locales only support
395 8-bit chars, not unicode. */
396typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100397 PyObject *decimal_point;
398 PyObject *thousands_sep;
399 const char *grouping;
Victor Stinner02e6bf72018-11-20 16:20:16 +0100400 char *grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200401} LocaleInfo;
402
Victor Stinner02e6bf72018-11-20 16:20:16 +0100403#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
Victor Stinner41a863c2012-02-24 00:37:51 +0100404
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405/* describes the layout for an integer, see the comment in
406 calc_number_widths() for details */
407typedef struct {
408 Py_ssize_t n_lpadding;
409 Py_ssize_t n_prefix;
410 Py_ssize_t n_spadding;
411 Py_ssize_t n_rpadding;
412 char sign;
413 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
414 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
415 any grouping chars. */
416 Py_ssize_t n_decimal; /* 0 if only an integer */
417 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
418 excluding the decimal itself, if
419 present. */
420
421 /* These 2 are not the widths of fields, but are needed by
422 STRINGLIB_GROUPING. */
423 Py_ssize_t n_digits; /* The number of digits before a decimal
424 or exponent. */
425 Py_ssize_t n_min_width; /* The min_width we used when we computed
426 the n_grouped_digits width. */
427} NumberFieldWidths;
428
429
430/* Given a number of the form:
431 digits[remainder]
432 where ptr points to the start and end points to the end, find where
433 the integer part ends. This could be a decimal, an exponent, both,
434 or neither.
435 If a decimal point is present, set *has_decimal and increment
436 remainder beyond it.
437 Results are undefined (but shouldn't crash) for improperly
438 formatted strings.
439*/
440static void
441parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
442 Py_ssize_t *n_remainder, int *has_decimal)
443{
444 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300445 int kind = PyUnicode_KIND(s);
446 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300448 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 ++pos;
450 remainder = pos;
451
452 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300453 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454
455 /* Skip the decimal point. */
456 if (*has_decimal)
457 remainder++;
458
459 *n_remainder = end - remainder;
460}
461
462/* not all fields of format are used. for example, precision is
463 unused. should this take discrete params in order to be more clear
464 about what it does? or is passing a single format parameter easier
465 and more efficient enough to justify a little obfuscation? */
466static Py_ssize_t
467calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
468 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
469 Py_ssize_t n_end, Py_ssize_t n_remainder,
470 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100471 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472{
473 Py_ssize_t n_non_digit_non_padding;
474 Py_ssize_t n_padding;
475
476 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
477 spec->n_lpadding = 0;
478 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100479 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 spec->n_remainder = n_remainder;
481 spec->n_spadding = 0;
482 spec->n_rpadding = 0;
483 spec->sign = '\0';
484 spec->n_sign = 0;
485
486 /* the output will look like:
487 | |
488 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
489 | |
490
491 sign is computed from format->sign and the actual
492 sign of the number
493
494 prefix is given (it's for the '0x' prefix)
495
496 digits is already known
497
498 the total width is either given, or computed from the
499 actual digits
500
501 only one of lpadding, spadding, and rpadding can be non-zero,
502 and it's calculated from the width and other fields
503 */
504
505 /* compute the various parts we're going to write */
506 switch (format->sign) {
507 case '+':
508 /* always put a + or - */
509 spec->n_sign = 1;
510 spec->sign = (sign_char == '-' ? '-' : '+');
511 break;
512 case ' ':
513 spec->n_sign = 1;
514 spec->sign = (sign_char == '-' ? '-' : ' ');
515 break;
516 default:
517 /* Not specified, or the default (-) */
518 if (sign_char == '-') {
519 spec->n_sign = 1;
520 spec->sign = '-';
521 }
522 }
523
524 /* The number of chars used for non-digits and non-padding. */
525 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
526 spec->n_remainder;
527
528 /* min_width can go negative, that's okay. format->width == -1 means
529 we don't care. */
530 if (format->fill_char == '0' && format->align == '=')
531 spec->n_min_width = format->width - n_non_digit_non_padding;
532 else
533 spec->n_min_width = 0;
534
535 if (spec->n_digits == 0)
536 /* This case only occurs when using 'c' formatting, we need
537 to special case it because the grouping code always wants
538 to have at least one character. */
539 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100540 else {
541 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100543 NULL, 0,
544 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200545 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100546 locale->grouping, locale->thousands_sep, &grouping_maxchar);
547 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
548 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200549
550 /* Given the desired width and the total of digit and non-digit
551 space we consume, see if we need any padding. format->width can
552 be negative (meaning no padding), but this code still works in
553 that case. */
554 n_padding = format->width -
555 (n_non_digit_non_padding + spec->n_grouped_digits);
556 if (n_padding > 0) {
557 /* Some padding is needed. Determine if it's left, space, or right. */
558 switch (format->align) {
559 case '<':
560 spec->n_rpadding = n_padding;
561 break;
562 case '^':
563 spec->n_lpadding = n_padding / 2;
564 spec->n_rpadding = n_padding - spec->n_lpadding;
565 break;
566 case '=':
567 spec->n_spadding = n_padding;
568 break;
569 case '>':
570 spec->n_lpadding = n_padding;
571 break;
572 default:
573 /* Shouldn't get here, but treat it as '>' */
Barry Warsawb2e57942017-09-14 18:13:16 -0700574 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200575 }
576 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100577
578 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
579 *maxchar = Py_MAX(*maxchar, format->fill_char);
580
Victor Stinner90f50d42012-02-24 01:44:47 +0100581 if (spec->n_decimal)
582 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
583
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200584 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
585 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
586 spec->n_remainder + spec->n_rpadding;
587}
588
589/* Fill in the digit parts of a numbers's string representation,
590 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200591 Return -1 on error, or 0 on success. */
592static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200593fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200594 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200595 PyObject *prefix, Py_ssize_t p_start,
596 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200597 LocaleInfo *locale, int toupper)
598{
599 /* Used to keep track of digits, decimal, and remainder. */
600 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200601 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200602 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200604
605 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200606 _PyUnicode_FastFill(writer->buffer,
607 writer->pos, spec->n_lpadding, fill_char);
608 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 }
610 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200611 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
612 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200613 }
614 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200615 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
616 prefix, p_start,
617 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200618 if (toupper) {
619 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500620 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200621 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100622 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100623 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200624 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500625 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200627 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 }
629 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200630 _PyUnicode_FastFill(writer->buffer,
631 writer->pos, spec->n_spadding, fill_char);
632 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 }
634
635 /* Only for type 'c' special case, it has no digits. */
636 if (spec->n_digits != 0) {
637 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200638 char *pdigits;
639 if (PyUnicode_READY(digits))
640 return -1;
641 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 if (PyUnicode_KIND(digits) < kind) {
643 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200644 if (pdigits == NULL)
645 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200646 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100647 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200648 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200650 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200651 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100652 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100653 if (r == -1)
654 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200656 if (PyUnicode_KIND(digits) < kind)
657 PyMem_Free(pdigits);
658 d_pos += spec->n_digits;
659 }
660 if (toupper) {
661 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500662 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200663 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100664 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500665 if (c > 127) {
666 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
667 return -1;
668 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200669 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500670 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200671 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200672 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200673
674 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200675 _PyUnicode_FastCopyCharacters(
676 writer->buffer, writer->pos,
677 locale->decimal_point, 0, spec->n_decimal);
678 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200679 d_pos += 1;
680 }
681
682 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200683 _PyUnicode_FastCopyCharacters(
684 writer->buffer, writer->pos,
685 digits, d_pos, spec->n_remainder);
686 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400687 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200688 }
689
690 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200691 _PyUnicode_FastFill(writer->buffer,
692 writer->pos, spec->n_rpadding,
693 fill_char);
694 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200695 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200696 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200697}
698
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200699static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200700
701/* Find the decimal point character(s?), thousands_separator(s?), and
702 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400703 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
704 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100705static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700706get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707{
708 switch (type) {
709 case LT_CURRENT_LOCALE: {
Victor Stinner02e6bf72018-11-20 16:20:16 +0100710 struct lconv *lc = localeconv();
711 if (_Py_GetLocaleconvNumeric(lc,
712 &locale_info->decimal_point,
713 &locale_info->thousands_sep) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100714 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100715 }
Victor Stinner02e6bf72018-11-20 16:20:16 +0100716
717 /* localeconv() grouping can become a dangling pointer or point
718 to a different string if another thread calls localeconv() during
719 the string formatting. Copy the string to avoid this risk. */
720 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
721 if (locale_info->grouping_buffer == NULL) {
722 PyErr_NoMemory();
723 return -1;
724 }
725 locale_info->grouping = locale_info->grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726 break;
727 }
728 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400729 case LT_UNDERSCORE_LOCALE:
730 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100731 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400732 locale_info->thousands_sep = PyUnicode_FromOrdinal(
733 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700734 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100735 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400736 if (type != LT_UNDER_FOUR_LOCALE)
737 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200738 (implicit) trailing 0 means repeat
739 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400740 else
741 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200742 break;
743 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100744 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
745 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700746 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100747 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200748 locale_info->grouping = no_grouping;
749 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200750 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100751 return 0;
752}
753
754static void
755free_locale_info(LocaleInfo *locale_info)
756{
757 Py_XDECREF(locale_info->decimal_point);
758 Py_XDECREF(locale_info->thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +0100759 PyMem_Free(locale_info->grouping_buffer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200760}
761
762/************************************************************************/
763/*********** string formatting ******************************************/
764/************************************************************************/
765
Victor Stinnerd3f08822012-05-29 12:57:52 +0200766static int
767format_string_internal(PyObject *value, const InternalFormatSpec *format,
768 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769{
770 Py_ssize_t lpad;
771 Py_ssize_t rpad;
772 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200773 Py_ssize_t len;
774 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200775 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200776
Victor Stinnerd3f08822012-05-29 12:57:52 +0200777 assert(PyUnicode_IS_READY(value));
778 len = PyUnicode_GET_LENGTH(value);
779
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200780 /* sign is not allowed on strings */
781 if (format->sign != '\0') {
782 PyErr_SetString(PyExc_ValueError,
783 "Sign not allowed in string format specifier");
784 goto done;
785 }
786
787 /* alternate is not allowed on strings */
788 if (format->alternate) {
789 PyErr_SetString(PyExc_ValueError,
790 "Alternate form (#) not allowed in string format "
791 "specifier");
792 goto done;
793 }
794
795 /* '=' alignment not allowed on strings */
796 if (format->align == '=') {
797 PyErr_SetString(PyExc_ValueError,
798 "'=' alignment not allowed "
799 "in string format specifier");
800 goto done;
801 }
802
Victor Stinner621ef3d2012-10-02 00:33:47 +0200803 if ((format->width == -1 || format->width <= len)
804 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200805 /* Fast path */
806 return _PyUnicodeWriter_WriteStr(writer, value);
807 }
808
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 /* if precision is specified, output no more that format.precision
810 characters */
811 if (format->precision >= 0 && len >= format->precision) {
812 len = format->precision;
813 }
814
815 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
816
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200817 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100818 if (lpad != 0 || rpad != 0)
819 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200820 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
821 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
822 maxchar = Py_MAX(maxchar, valmaxchar);
823 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100824
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200825 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200826 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200827 goto done;
828
829 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400830 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200831 if (result == -1)
832 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200833
834 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200835 if (len) {
836 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
837 value, 0, len);
838 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200839 writer->pos += (len + rpad);
840 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200841
842done:
843 return result;
844}
845
846
847/************************************************************************/
848/*********** long formatting ********************************************/
849/************************************************************************/
850
Victor Stinnerd3f08822012-05-29 12:57:52 +0200851static int
852format_long_internal(PyObject *value, const InternalFormatSpec *format,
853 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200854{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200855 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100856 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 PyObject *tmp = NULL;
858 Py_ssize_t inumeric_chars;
859 Py_UCS4 sign_char = '\0';
860 Py_ssize_t n_digits; /* count of digits need from the computed
861 string */
862 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
863 produces non-digits */
864 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
865 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100866 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867 NumberFieldWidths spec;
868 long x;
869
870 /* Locale settings, either from the actual locale or
871 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +0100872 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200873
874 /* no precision allowed on integers */
875 if (format->precision != -1) {
876 PyErr_SetString(PyExc_ValueError,
877 "Precision not allowed in integer format specifier");
878 goto done;
879 }
880
881 /* special case for character formatting */
882 if (format->type == 'c') {
883 /* error to specify a sign */
884 if (format->sign != '\0') {
885 PyErr_SetString(PyExc_ValueError,
886 "Sign not allowed with integer"
887 " format specifier 'c'");
888 goto done;
889 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400890 /* error to request alternate format */
891 if (format->alternate) {
892 PyErr_SetString(PyExc_ValueError,
893 "Alternate form (#) not allowed with integer"
894 " format specifier 'c'");
895 goto done;
896 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897
898 /* taken from unicodeobject.c formatchar() */
899 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200900 x = PyLong_AsLong(value);
901 if (x == -1 && PyErr_Occurred())
902 goto done;
903 if (x < 0 || x > 0x10ffff) {
904 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100905 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200906 goto done;
907 }
908 tmp = PyUnicode_FromOrdinal(x);
909 inumeric_chars = 0;
910 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100911 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200912
913 /* As a sort-of hack, we tell calc_number_widths that we only
914 have "remainder" characters. calc_number_widths thinks
915 these are characters that don't get formatted, only copied
916 into the output string. We do this for 'c' formatting,
917 because the characters are likely to be non-digits. */
918 n_remainder = 1;
919 }
920 else {
921 int base;
922 int leading_chars_to_skip = 0; /* Number of characters added by
923 PyNumber_ToBase that we want to
924 skip over. */
925
926 /* Compute the base and how many characters will be added by
927 PyNumber_ToBase */
928 switch (format->type) {
929 case 'b':
930 base = 2;
931 leading_chars_to_skip = 2; /* 0b */
932 break;
933 case 'o':
934 base = 8;
935 leading_chars_to_skip = 2; /* 0o */
936 break;
937 case 'x':
938 case 'X':
939 base = 16;
940 leading_chars_to_skip = 2; /* 0x */
941 break;
942 default: /* shouldn't be needed, but stops a compiler warning */
943 case 'd':
944 case 'n':
945 base = 10;
946 break;
947 }
948
Victor Stinnerd3f08822012-05-29 12:57:52 +0200949 if (format->sign != '+' && format->sign != ' '
950 && format->width == -1
951 && format->type != 'X' && format->type != 'n'
952 && !format->thousands_separators
953 && PyLong_CheckExact(value))
954 {
955 /* Fast path */
956 return _PyLong_FormatWriter(writer, value, base, format->alternate);
957 }
958
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 /* The number of prefix chars is the same as the leading
960 chars to skip */
961 if (format->alternate)
962 n_prefix = leading_chars_to_skip;
963
964 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200965 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
967 goto done;
968
969 inumeric_chars = 0;
970 n_digits = PyUnicode_GET_LENGTH(tmp);
971
972 prefix = inumeric_chars;
973
974 /* Is a sign character present in the output? If so, remember it
975 and skip it */
976 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
977 sign_char = '-';
978 ++prefix;
979 ++leading_chars_to_skip;
980 }
981
982 /* Skip over the leading chars (0x, 0b, etc.) */
983 n_digits -= leading_chars_to_skip;
984 inumeric_chars += leading_chars_to_skip;
985 }
986
987 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100988 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400989 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100990 &locale) == -1)
991 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992
993 /* Calculate how much memory we'll need. */
994 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100995 inumeric_chars + n_digits, n_remainder, 0,
996 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100997
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200998 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200999 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 goto done;
1001
1002 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001003 result = fill_number(writer, &spec,
1004 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001005 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001006 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001007
1008done:
1009 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001010 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001011 return result;
1012}
1013
1014/************************************************************************/
1015/*********** float formatting *******************************************/
1016/************************************************************************/
1017
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001018/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001019static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001020format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001021 const InternalFormatSpec *format,
1022 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023{
1024 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1025 Py_ssize_t n_digits;
1026 Py_ssize_t n_remainder;
1027 Py_ssize_t n_total;
1028 int has_decimal;
1029 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001030 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001031 Py_UCS4 type = format->type;
1032 int add_pct = 0;
1033 Py_ssize_t index;
1034 NumberFieldWidths spec;
1035 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001036 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001037 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001038 Py_UCS4 sign_char = '\0';
1039 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1040 PyObject *unicode_tmp = NULL;
1041
1042 /* Locale settings, either from the actual locale or
1043 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001044 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001045
Victor Stinner2f084ec2013-06-23 14:54:30 +02001046 if (format->precision > INT_MAX) {
1047 PyErr_SetString(PyExc_ValueError, "precision too big");
1048 goto done;
1049 }
1050 precision = (int)format->precision;
1051
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001052 if (format->alternate)
1053 flags |= Py_DTSF_ALT;
1054
1055 if (type == '\0') {
1056 /* Omitted type specifier. Behaves in the same way as repr(x)
1057 and str(x) if no precision is given, else like 'g', but with
1058 at least one digit after the decimal point. */
1059 flags |= Py_DTSF_ADD_DOT_0;
1060 type = 'r';
1061 default_precision = 0;
1062 }
1063
1064 if (type == 'n')
1065 /* 'n' is the same as 'g', except for the locale used to
1066 format the result. We take care of that later. */
1067 type = 'g';
1068
1069 val = PyFloat_AsDouble(value);
1070 if (val == -1.0 && PyErr_Occurred())
1071 goto done;
1072
1073 if (type == '%') {
1074 type = 'f';
1075 val *= 100;
1076 add_pct = 1;
1077 }
1078
1079 if (precision < 0)
1080 precision = default_precision;
1081 else if (type == 'r')
1082 type = 'g';
1083
Martin Panter4c359642016-05-08 13:53:41 +00001084 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 8-bit char. This is safe, because we've restricted what "type"
1086 can be. */
1087 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1088 &float_type);
1089 if (buf == NULL)
1090 goto done;
1091 n_digits = strlen(buf);
1092
1093 if (add_pct) {
1094 /* We know that buf has a trailing zero (since we just called
1095 strlen() on it), and we don't use that fact any more. So we
1096 can just write over the trailing zero. */
1097 buf[n_digits] = '%';
1098 n_digits += 1;
1099 }
1100
Victor Stinnerd3f08822012-05-29 12:57:52 +02001101 if (format->sign != '+' && format->sign != ' '
1102 && format->width == -1
1103 && format->type != 'n'
1104 && !format->thousands_separators)
1105 {
1106 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001107 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1108 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001109 return result;
1110 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111
Victor Stinner4a587072013-11-19 12:54:53 +01001112 /* Since there is no unicode version of PyOS_double_to_string,
1113 just use the 8 bit version and then convert to unicode. */
1114 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1115 PyMem_Free(buf);
1116 if (unicode_tmp == NULL)
1117 goto done;
1118
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001119 /* Is a sign character present in the output? If so, remember it
1120 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001121 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1123 sign_char = '-';
1124 ++index;
1125 --n_digits;
1126 }
1127
1128 /* Determine if we have any "remainder" (after the digits, might include
1129 decimal or exponent or both (or neither)) */
1130 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1131
1132 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001133 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001134 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001135 &locale) == -1)
1136 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001137
1138 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001139 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001141 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001142
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001144 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 goto done;
1146
1147 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001148 result = fill_number(writer, &spec,
1149 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001150 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001151 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152
1153done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001154 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001155 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 return result;
1157}
1158
1159/************************************************************************/
1160/*********** complex formatting *****************************************/
1161/************************************************************************/
1162
Victor Stinnerd3f08822012-05-29 12:57:52 +02001163static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001164format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001165 const InternalFormatSpec *format,
1166 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001167{
1168 double re;
1169 double im;
1170 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1171 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1172
1173 InternalFormatSpec tmp_format = *format;
1174 Py_ssize_t n_re_digits;
1175 Py_ssize_t n_im_digits;
1176 Py_ssize_t n_re_remainder;
1177 Py_ssize_t n_im_remainder;
1178 Py_ssize_t n_re_total;
1179 Py_ssize_t n_im_total;
1180 int re_has_decimal;
1181 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001182 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 Py_UCS4 type = format->type;
1184 Py_ssize_t i_re;
1185 Py_ssize_t i_im;
1186 NumberFieldWidths re_spec;
1187 NumberFieldWidths im_spec;
1188 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001189 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001190 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001191 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001192 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001193 Py_UCS4 re_sign_char = '\0';
1194 Py_UCS4 im_sign_char = '\0';
1195 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1196 int im_float_type;
1197 int add_parens = 0;
1198 int skip_re = 0;
1199 Py_ssize_t lpad;
1200 Py_ssize_t rpad;
1201 Py_ssize_t total;
1202 PyObject *re_unicode_tmp = NULL;
1203 PyObject *im_unicode_tmp = NULL;
1204
1205 /* Locale settings, either from the actual locale or
1206 from a hard-code pseudo-locale */
Victor Stinner02e6bf72018-11-20 16:20:16 +01001207 LocaleInfo locale = LocaleInfo_STATIC_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001208
Victor Stinner2f084ec2013-06-23 14:54:30 +02001209 if (format->precision > INT_MAX) {
1210 PyErr_SetString(PyExc_ValueError, "precision too big");
1211 goto done;
1212 }
1213 precision = (int)format->precision;
1214
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001215 /* Zero padding is not allowed. */
1216 if (format->fill_char == '0') {
1217 PyErr_SetString(PyExc_ValueError,
1218 "Zero padding is not allowed in complex format "
1219 "specifier");
1220 goto done;
1221 }
1222
1223 /* Neither is '=' alignment . */
1224 if (format->align == '=') {
1225 PyErr_SetString(PyExc_ValueError,
1226 "'=' alignment flag is not allowed in complex format "
1227 "specifier");
1228 goto done;
1229 }
1230
1231 re = PyComplex_RealAsDouble(value);
1232 if (re == -1.0 && PyErr_Occurred())
1233 goto done;
1234 im = PyComplex_ImagAsDouble(value);
1235 if (im == -1.0 && PyErr_Occurred())
1236 goto done;
1237
1238 if (format->alternate)
1239 flags |= Py_DTSF_ALT;
1240
1241 if (type == '\0') {
1242 /* Omitted type specifier. Should be like str(self). */
1243 type = 'r';
1244 default_precision = 0;
1245 if (re == 0.0 && copysign(1.0, re) == 1.0)
1246 skip_re = 1;
1247 else
1248 add_parens = 1;
1249 }
1250
1251 if (type == 'n')
1252 /* 'n' is the same as 'g', except for the locale used to
1253 format the result. We take care of that later. */
1254 type = 'g';
1255
1256 if (precision < 0)
1257 precision = default_precision;
1258 else if (type == 'r')
1259 type = 'g';
1260
Martin Panter4c359642016-05-08 13:53:41 +00001261 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 8-bit char. This is safe, because we've restricted what "type"
1263 can be. */
1264 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1265 &re_float_type);
1266 if (re_buf == NULL)
1267 goto done;
1268 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1269 &im_float_type);
1270 if (im_buf == NULL)
1271 goto done;
1272
1273 n_re_digits = strlen(re_buf);
1274 n_im_digits = strlen(im_buf);
1275
1276 /* Since there is no unicode version of PyOS_double_to_string,
1277 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001278 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 if (re_unicode_tmp == NULL)
1280 goto done;
1281 i_re = 0;
1282
Victor Stinnerd3f08822012-05-29 12:57:52 +02001283 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001284 if (im_unicode_tmp == NULL)
1285 goto done;
1286 i_im = 0;
1287
1288 /* Is a sign character present in the output? If so, remember it
1289 and skip it */
1290 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1291 re_sign_char = '-';
1292 ++i_re;
1293 --n_re_digits;
1294 }
1295 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1296 im_sign_char = '-';
1297 ++i_im;
1298 --n_im_digits;
1299 }
1300
1301 /* Determine if we have any "remainder" (after the digits, might include
1302 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001303 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001305 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 &n_im_remainder, &im_has_decimal);
1307
1308 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001309 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001310 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001311 &locale) == -1)
1312 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313
1314 /* Turn off any padding. We'll do it later after we've composed
1315 the numbers without padding. */
1316 tmp_format.fill_char = '\0';
1317 tmp_format.align = '<';
1318 tmp_format.width = -1;
1319
1320 /* Calculate how much memory we'll need. */
1321 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1322 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001323 re_has_decimal, &locale, &tmp_format,
1324 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325
1326 /* Same formatting, but always include a sign, unless the real part is
1327 * going to be omitted, in which case we use whatever sign convention was
1328 * requested by the original format. */
1329 if (!skip_re)
1330 tmp_format.sign = '+';
1331 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1332 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001333 im_has_decimal, &locale, &tmp_format,
1334 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335
1336 if (skip_re)
1337 n_re_total = 0;
1338
1339 /* Add 1 for the 'j', and optionally 2 for parens. */
1340 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1341 format->width, format->align, &lpad, &rpad, &total);
1342
Victor Stinner41a863c2012-02-24 00:37:51 +01001343 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001344 maxchar = Py_MAX(maxchar, format->fill_char);
1345
Victor Stinnerd3f08822012-05-29 12:57:52 +02001346 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001347 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001348 rkind = writer->kind;
1349 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350
1351 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001352 result = fill_padding(writer,
1353 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001354 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001355 if (result == -1)
1356 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001357
Victor Stinnerd3f08822012-05-29 12:57:52 +02001358 if (add_parens) {
1359 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1360 writer->pos++;
1361 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001362
1363 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001364 result = fill_number(writer, &re_spec,
1365 re_unicode_tmp, i_re, i_re + n_re_digits,
1366 NULL, 0,
1367 0,
1368 &locale, 0);
1369 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001370 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001371 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001372 result = fill_number(writer, &im_spec,
1373 im_unicode_tmp, i_im, i_im + n_im_digits,
1374 NULL, 0,
1375 0,
1376 &locale, 0);
1377 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001378 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001379 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1380 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001381
Victor Stinnerd3f08822012-05-29 12:57:52 +02001382 if (add_parens) {
1383 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1384 writer->pos++;
1385 }
1386
1387 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001388
1389done:
1390 PyMem_Free(re_buf);
1391 PyMem_Free(im_buf);
1392 Py_XDECREF(re_unicode_tmp);
1393 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001394 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001395 return result;
1396}
1397
1398/************************************************************************/
1399/*********** built in formatters ****************************************/
1400/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001401static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001402format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1403{
1404 PyObject *str;
1405 int err;
1406
1407 str = PyObject_Str(obj);
1408 if (str == NULL)
1409 return -1;
1410 err = _PyUnicodeWriter_WriteStr(writer, str);
1411 Py_DECREF(str);
1412 return err;
1413}
1414
1415int
1416_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1417 PyObject *obj,
1418 PyObject *format_spec,
1419 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001420{
1421 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001422
1423 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001424
1425 /* check for the special case of zero length format spec, make
1426 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001427 if (start == end) {
1428 if (PyUnicode_CheckExact(obj))
1429 return _PyUnicodeWriter_WriteStr(writer, obj);
1430 else
1431 return format_obj(obj, writer);
1432 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001433
1434 /* parse the format_spec */
1435 if (!parse_internal_render_format_spec(format_spec, start, end,
1436 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001437 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001438
1439 /* type conversion? */
1440 switch (format.type) {
1441 case 's':
1442 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001443 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001444 default:
1445 /* unknown */
1446 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001447 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001448 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001449}
1450
Victor Stinnerd3f08822012-05-29 12:57:52 +02001451int
1452_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1453 PyObject *obj,
1454 PyObject *format_spec,
1455 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001456{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001457 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001458 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001459 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001460
1461 /* check for the special case of zero length format spec, make
1462 it equivalent to str(obj) */
1463 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001464 if (PyLong_CheckExact(obj))
1465 return _PyLong_FormatWriter(writer, obj, 10, 0);
1466 else
1467 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001468 }
1469
1470 /* parse the format_spec */
1471 if (!parse_internal_render_format_spec(format_spec, start, end,
1472 &format, 'd', '>'))
1473 goto done;
1474
1475 /* type conversion? */
1476 switch (format.type) {
1477 case 'b':
1478 case 'c':
1479 case 'd':
1480 case 'o':
1481 case 'x':
1482 case 'X':
1483 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001484 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001485 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001486 break;
1487
1488 case 'e':
1489 case 'E':
1490 case 'f':
1491 case 'F':
1492 case 'g':
1493 case 'G':
1494 case '%':
1495 /* convert to float */
1496 tmp = PyNumber_Float(obj);
1497 if (tmp == NULL)
1498 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001499 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001500 break;
1501
1502 default:
1503 /* unknown */
1504 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1505 goto done;
1506 }
1507
1508done:
1509 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001510 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511 return result;
1512}
1513
Victor Stinnerd3f08822012-05-29 12:57:52 +02001514int
1515_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1516 PyObject *obj,
1517 PyObject *format_spec,
1518 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001519{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001520 InternalFormatSpec format;
1521
1522 /* check for the special case of zero length format spec, make
1523 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001524 if (start == end)
1525 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001526
1527 /* parse the format_spec */
1528 if (!parse_internal_render_format_spec(format_spec, start, end,
1529 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001530 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001531
1532 /* type conversion? */
1533 switch (format.type) {
1534 case '\0': /* No format code: like 'g', but with at least one decimal. */
1535 case 'e':
1536 case 'E':
1537 case 'f':
1538 case 'F':
1539 case 'g':
1540 case 'G':
1541 case 'n':
1542 case '%':
1543 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001544 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001545
1546 default:
1547 /* unknown */
1548 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001549 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001550 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001551}
1552
Victor Stinnerd3f08822012-05-29 12:57:52 +02001553int
1554_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1555 PyObject *obj,
1556 PyObject *format_spec,
1557 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001558{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001559 InternalFormatSpec format;
1560
1561 /* check for the special case of zero length format spec, make
1562 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001563 if (start == end)
1564 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001565
1566 /* parse the format_spec */
1567 if (!parse_internal_render_format_spec(format_spec, start, end,
1568 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001569 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001570
1571 /* type conversion? */
1572 switch (format.type) {
1573 case '\0': /* No format code: like 'g', but with at least one decimal. */
1574 case 'e':
1575 case 'E':
1576 case 'f':
1577 case 'F':
1578 case 'g':
1579 case 'G':
1580 case 'n':
1581 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001582 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001583
1584 default:
1585 /* unknown */
1586 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001587 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001588 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001589}