blob: 2cd3eb8eb75098c939004ddba8c750e242a19d18 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070031invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020032{
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070033 assert(specifier == ',' || specifier == '_');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020034 if (presentation_type > 32 && presentation_type < 128)
35 PyErr_Format(PyExc_ValueError,
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070036 "Cannot specify '%c' with '%c'.",
37 specifier, (char)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020038 else
39 PyErr_Format(PyExc_ValueError,
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -070040 "Cannot specify '%c' with '\\x%x'.",
41 specifier, (unsigned int)presentation_type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020042}
43
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040044static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070045invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040046{
47 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
48}
49
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020050/*
51 get_integer consumes 0 or more decimal digit characters from an
52 input string, updates *result with the corresponding positive
53 integer, and returns the number of digits consumed.
54
55 returns -1 on error.
56*/
57static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030058get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059 Py_ssize_t *result)
60{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030061 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020062 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030063 int kind = PyUnicode_KIND(str);
64 void *data = PyUnicode_DATA(str);
65
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020066 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030067 for (; pos < end; pos++, numdigits++) {
68 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020069 if (digitval < 0)
70 break;
71 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000072 Detect possible overflow before it happens:
73
74 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
75 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020076 */
Mark Dickinson47862d42011-12-01 15:27:04 +000077 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020078 PyErr_Format(PyExc_ValueError,
79 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030080 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020081 return -1;
82 }
Mark Dickinson47862d42011-12-01 15:27:04 +000083 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020084 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030085 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020086 *result = accumulator;
87 return numdigits;
88}
89
90/************************************************************************/
91/*********** standard format specifier parsing **************************/
92/************************************************************************/
93
94/* returns true if this character is a specifier alignment token */
95Py_LOCAL_INLINE(int)
96is_alignment_token(Py_UCS4 c)
97{
98 switch (c) {
99 case '<': case '>': case '=': case '^':
100 return 1;
101 default:
102 return 0;
103 }
104}
105
106/* returns true if this character is a sign element */
107Py_LOCAL_INLINE(int)
108is_sign_element(Py_UCS4 c)
109{
110 switch (c) {
111 case ' ': case '+': case '-':
112 return 1;
113 default:
114 return 0;
115 }
116}
Eric Smith8c663262007-08-25 02:26:07 +0000117
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400118/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700119enum LocaleType {
120 LT_NO_LOCALE = 0,
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -0700121 LT_DEFAULT_LOCALE = ',',
122 LT_UNDERSCORE_LOCALE = '_',
Benjamin Peterson995026a2016-09-13 22:46:15 -0700123 LT_UNDER_FOUR_LOCALE,
124 LT_CURRENT_LOCALE
125};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000126
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127typedef struct {
128 Py_UCS4 fill_char;
129 Py_UCS4 align;
130 int alternate;
131 Py_UCS4 sign;
132 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700133 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200134 Py_ssize_t precision;
135 Py_UCS4 type;
136} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000137
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200138#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700139/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200140static void
141DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
142{
143 printf("internal format spec: fill_char %d\n", format->fill_char);
144 printf("internal format spec: align %d\n", format->align);
145 printf("internal format spec: alternate %d\n", format->alternate);
146 printf("internal format spec: sign %d\n", format->sign);
147 printf("internal format spec: width %zd\n", format->width);
148 printf("internal format spec: thousands_separators %d\n",
149 format->thousands_separators);
150 printf("internal format spec: precision %zd\n", format->precision);
151 printf("internal format spec: type %c\n", format->type);
152 printf("\n");
153}
154#endif
155
156
157/*
158 ptr points to the start of the format_spec, end points just past its end.
159 fills in format with the parsed information.
160 returns 1 on success, 0 on failure.
161 if failure, sets the exception
162*/
163static int
164parse_internal_render_format_spec(PyObject *format_spec,
165 Py_ssize_t start, Py_ssize_t end,
166 InternalFormatSpec *format,
167 char default_type,
168 char default_align)
169{
170 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300171 int kind = PyUnicode_KIND(format_spec);
172 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200173 /* end-pos is used throughout this code to specify the length of
174 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300175#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176
177 Py_ssize_t consumed;
178 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400179 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200180
Eric V. Smith2ea97122014-04-14 11:55:10 -0400181 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200182 format->align = default_align;
183 format->alternate = 0;
184 format->sign = '\0';
185 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700186 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 format->precision = -1;
188 format->type = default_type;
189
190 /* If the second char is an alignment token,
191 then parse the fill char */
192 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
193 format->align = READ_spec(pos+1);
194 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400195 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 align_specified = 1;
197 pos += 2;
198 }
199 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
200 format->align = READ_spec(pos);
201 align_specified = 1;
202 ++pos;
203 }
204
205 /* Parse the various sign options */
206 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
207 format->sign = READ_spec(pos);
208 ++pos;
209 }
210
211 /* If the next character is #, we're in alternate mode. This only
212 applies to integers. */
213 if (end-pos >= 1 && READ_spec(pos) == '#') {
214 format->alternate = 1;
215 ++pos;
216 }
217
218 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400219 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200220 format->fill_char = '0';
221 if (!align_specified) {
222 format->align = '=';
223 }
224 ++pos;
225 }
226
227 consumed = get_integer(format_spec, &pos, end, &format->width);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* If consumed is 0, we didn't consume any characters for the
233 width. In that case, reset the width to -1, because
234 get_integer() will have set it to zero. -1 is how we record
235 that the width wasn't specified. */
236 if (consumed == 0)
237 format->width = -1;
238
239 /* Comma signifies add thousands separators */
240 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400241 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200242 ++pos;
243 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400244 /* Underscore signifies add thousands separators */
245 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700246 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400247 invalid_comma_and_underscore();
248 return 0;
249 }
250 format->thousands_separators = LT_UNDERSCORE_LOCALE;
251 ++pos;
252 }
253 if (end-pos && READ_spec(pos) == ',') {
254 invalid_comma_and_underscore();
255 return 0;
256 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200257
258 /* Parse field precision */
259 if (end-pos && READ_spec(pos) == '.') {
260 ++pos;
261
262 consumed = get_integer(format_spec, &pos, end, &format->precision);
263 if (consumed == -1)
264 /* Overflow error. Exception already set. */
265 return 0;
266
267 /* Not having a precision after a dot is an error. */
268 if (consumed == 0) {
269 PyErr_Format(PyExc_ValueError,
270 "Format specifier missing precision");
271 return 0;
272 }
273
274 }
275
276 /* Finally, parse the type field. */
277
278 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500279 /* More than one char remain, invalid format specifier. */
280 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200281 return 0;
282 }
283
284 if (end-pos == 1) {
285 format->type = READ_spec(pos);
286 ++pos;
287 }
288
289 /* Do as much validating as we can, just by looking at the format
290 specifier. Do not take into account what type of formatting
291 we're doing (int, float, string). */
292
293 if (format->thousands_separators) {
294 switch (format->type) {
295 case 'd':
296 case 'e':
297 case 'f':
298 case 'g':
299 case 'E':
300 case 'G':
301 case '%':
302 case 'F':
303 case '\0':
304 /* These are allowed. See PEP 378.*/
305 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400306 case 'b':
307 case 'o':
308 case 'x':
309 case 'X':
310 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
311 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
312 /* Every four digits, not every three, in bin/oct/hex. */
313 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
314 break;
315 }
Stefan Krahf432a322017-08-21 13:09:59 +0200316 /* fall through */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 default:
Miss Islington (bot)cd4dd932018-10-01 22:12:02 -0700318 invalid_thousands_separator_type(format->thousands_separators, format->type);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319 return 0;
320 }
321 }
322
Victor Stinnera4ac6002012-01-21 15:50:49 +0100323 assert (format->align <= 127);
324 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200325 return 1;
326}
327
328/* Calculate the padding needed. */
329static void
330calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
331 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
332 Py_ssize_t *n_total)
333{
334 if (width >= 0) {
335 if (nchars > width)
336 *n_total = nchars;
337 else
338 *n_total = width;
339 }
340 else {
341 /* not specified, use all of the chars and no more */
342 *n_total = nchars;
343 }
344
345 /* Figure out how much leading space we need, based on the
346 aligning */
347 if (align == '>')
348 *n_lpadding = *n_total - nchars;
349 else if (align == '^')
350 *n_lpadding = (*n_total - nchars) / 2;
351 else if (align == '<' || align == '=')
352 *n_lpadding = 0;
353 else {
354 /* We should never have an unspecified alignment. */
Barry Warsawb2e57942017-09-14 18:13:16 -0700355 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356 }
357
358 *n_rpadding = *n_total - nchars - *n_lpadding;
359}
360
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200361/* Do the padding, and return a pointer to where the caller-supplied
362 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200363static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200364fill_padding(_PyUnicodeWriter *writer,
365 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
367 Py_ssize_t n_rpadding)
368{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200369 Py_ssize_t pos;
370
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200372 if (n_lpadding) {
373 pos = writer->pos;
374 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376
377 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200378 if (n_rpadding) {
379 pos = writer->pos + nchars + n_lpadding;
380 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
381 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200382
383 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200384 writer->pos += n_lpadding;
385 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200386}
387
388/************************************************************************/
389/*********** common routines for numeric formatting *********************/
390/************************************************************************/
391
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200392/* Locale info needed for formatting integers and the part of floats
393 before and including the decimal. Note that locales only support
394 8-bit chars, not unicode. */
395typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100396 PyObject *decimal_point;
397 PyObject *thousands_sep;
398 const char *grouping;
Victor Stinner6eff6b82018-11-20 22:06:21 +0100399 char *grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400} LocaleInfo;
401
Victor Stinner6eff6b82018-11-20 22:06:21 +0100402#define STATIC_LOCALE_INFO_INIT {0, 0, 0, 0}
Victor Stinner41a863c2012-02-24 00:37:51 +0100403
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200404/* describes the layout for an integer, see the comment in
405 calc_number_widths() for details */
406typedef struct {
407 Py_ssize_t n_lpadding;
408 Py_ssize_t n_prefix;
409 Py_ssize_t n_spadding;
410 Py_ssize_t n_rpadding;
411 char sign;
412 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
413 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
414 any grouping chars. */
415 Py_ssize_t n_decimal; /* 0 if only an integer */
416 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
417 excluding the decimal itself, if
418 present. */
419
420 /* These 2 are not the widths of fields, but are needed by
421 STRINGLIB_GROUPING. */
422 Py_ssize_t n_digits; /* The number of digits before a decimal
423 or exponent. */
424 Py_ssize_t n_min_width; /* The min_width we used when we computed
425 the n_grouped_digits width. */
426} NumberFieldWidths;
427
428
429/* Given a number of the form:
430 digits[remainder]
431 where ptr points to the start and end points to the end, find where
432 the integer part ends. This could be a decimal, an exponent, both,
433 or neither.
434 If a decimal point is present, set *has_decimal and increment
435 remainder beyond it.
436 Results are undefined (but shouldn't crash) for improperly
437 formatted strings.
438*/
439static void
440parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
441 Py_ssize_t *n_remainder, int *has_decimal)
442{
443 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300444 int kind = PyUnicode_KIND(s);
445 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300447 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 ++pos;
449 remainder = pos;
450
451 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300452 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453
454 /* Skip the decimal point. */
455 if (*has_decimal)
456 remainder++;
457
458 *n_remainder = end - remainder;
459}
460
461/* not all fields of format are used. for example, precision is
462 unused. should this take discrete params in order to be more clear
463 about what it does? or is passing a single format parameter easier
464 and more efficient enough to justify a little obfuscation? */
465static Py_ssize_t
466calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
467 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
468 Py_ssize_t n_end, Py_ssize_t n_remainder,
469 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100470 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471{
472 Py_ssize_t n_non_digit_non_padding;
473 Py_ssize_t n_padding;
474
475 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
476 spec->n_lpadding = 0;
477 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100478 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200479 spec->n_remainder = n_remainder;
480 spec->n_spadding = 0;
481 spec->n_rpadding = 0;
482 spec->sign = '\0';
483 spec->n_sign = 0;
484
485 /* the output will look like:
486 | |
487 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
488 | |
489
490 sign is computed from format->sign and the actual
491 sign of the number
492
493 prefix is given (it's for the '0x' prefix)
494
495 digits is already known
496
497 the total width is either given, or computed from the
498 actual digits
499
500 only one of lpadding, spadding, and rpadding can be non-zero,
501 and it's calculated from the width and other fields
502 */
503
504 /* compute the various parts we're going to write */
505 switch (format->sign) {
506 case '+':
507 /* always put a + or - */
508 spec->n_sign = 1;
509 spec->sign = (sign_char == '-' ? '-' : '+');
510 break;
511 case ' ':
512 spec->n_sign = 1;
513 spec->sign = (sign_char == '-' ? '-' : ' ');
514 break;
515 default:
516 /* Not specified, or the default (-) */
517 if (sign_char == '-') {
518 spec->n_sign = 1;
519 spec->sign = '-';
520 }
521 }
522
523 /* The number of chars used for non-digits and non-padding. */
524 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
525 spec->n_remainder;
526
527 /* min_width can go negative, that's okay. format->width == -1 means
528 we don't care. */
529 if (format->fill_char == '0' && format->align == '=')
530 spec->n_min_width = format->width - n_non_digit_non_padding;
531 else
532 spec->n_min_width = 0;
533
534 if (spec->n_digits == 0)
535 /* This case only occurs when using 'c' formatting, we need
536 to special case it because the grouping code always wants
537 to have at least one character. */
538 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100539 else {
540 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200541 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100542 NULL, 0,
543 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100545 locale->grouping, locale->thousands_sep, &grouping_maxchar);
546 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
547 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200548
549 /* Given the desired width and the total of digit and non-digit
550 space we consume, see if we need any padding. format->width can
551 be negative (meaning no padding), but this code still works in
552 that case. */
553 n_padding = format->width -
554 (n_non_digit_non_padding + spec->n_grouped_digits);
555 if (n_padding > 0) {
556 /* Some padding is needed. Determine if it's left, space, or right. */
557 switch (format->align) {
558 case '<':
559 spec->n_rpadding = n_padding;
560 break;
561 case '^':
562 spec->n_lpadding = n_padding / 2;
563 spec->n_rpadding = n_padding - spec->n_lpadding;
564 break;
565 case '=':
566 spec->n_spadding = n_padding;
567 break;
568 case '>':
569 spec->n_lpadding = n_padding;
570 break;
571 default:
572 /* Shouldn't get here, but treat it as '>' */
Barry Warsawb2e57942017-09-14 18:13:16 -0700573 Py_UNREACHABLE();
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200574 }
575 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100576
577 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
578 *maxchar = Py_MAX(*maxchar, format->fill_char);
579
Victor Stinner90f50d42012-02-24 01:44:47 +0100580 if (spec->n_decimal)
581 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
582
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200583 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
584 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
585 spec->n_remainder + spec->n_rpadding;
586}
587
588/* Fill in the digit parts of a numbers's string representation,
589 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200590 Return -1 on error, or 0 on success. */
591static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200592fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200593 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200594 PyObject *prefix, Py_ssize_t p_start,
595 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 LocaleInfo *locale, int toupper)
597{
598 /* Used to keep track of digits, decimal, and remainder. */
599 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200600 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200601 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200602 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603
604 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200605 _PyUnicode_FastFill(writer->buffer,
606 writer->pos, spec->n_lpadding, fill_char);
607 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 }
609 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200610 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
611 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200612 }
613 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200614 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
615 prefix, p_start,
616 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200617 if (toupper) {
618 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500619 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200620 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100621 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100622 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200623 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200626 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200627 }
628 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200629 _PyUnicode_FastFill(writer->buffer,
630 writer->pos, spec->n_spadding, fill_char);
631 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200632 }
633
634 /* Only for type 'c' special case, it has no digits. */
635 if (spec->n_digits != 0) {
636 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200637 char *pdigits;
638 if (PyUnicode_READY(digits))
639 return -1;
640 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200641 if (PyUnicode_KIND(digits) < kind) {
642 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200643 if (pdigits == NULL)
644 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100646 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200647 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200649 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200650 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100651 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100652 if (r == -1)
653 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 if (PyUnicode_KIND(digits) < kind)
656 PyMem_Free(pdigits);
657 d_pos += spec->n_digits;
658 }
659 if (toupper) {
660 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500661 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200662 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100663 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500664 if (c > 127) {
665 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
666 return -1;
667 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200668 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500669 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200670 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200671 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200672
673 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200674 _PyUnicode_FastCopyCharacters(
675 writer->buffer, writer->pos,
676 locale->decimal_point, 0, spec->n_decimal);
677 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 d_pos += 1;
679 }
680
681 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200682 _PyUnicode_FastCopyCharacters(
683 writer->buffer, writer->pos,
684 digits, d_pos, spec->n_remainder);
685 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400686 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 }
688
689 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200690 _PyUnicode_FastFill(writer->buffer,
691 writer->pos, spec->n_rpadding,
692 fill_char);
693 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200695 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200696}
697
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200698static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200699
700/* Find the decimal point character(s?), thousands_separator(s?), and
701 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400702 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
703 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100704static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700705get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706{
707 switch (type) {
708 case LT_CURRENT_LOCALE: {
Victor Stinner6eff6b82018-11-20 22:06:21 +0100709 const char *grouping;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100710 if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
711 &locale_info->thousands_sep,
Victor Stinner6eff6b82018-11-20 22:06:21 +0100712 &grouping) < 0) {
Victor Stinner41a863c2012-02-24 00:37:51 +0100713 return -1;
Victor Stinnercb064fc2018-01-15 15:58:02 +0100714 }
Victor Stinner6eff6b82018-11-20 22:06:21 +0100715
716 /* localeconv() grouping can become a dangling pointer or point
717 to a different string if another thread calls localeconv() during
718 the string formatting. Copy the string to avoid this risk. */
719 locale_info->grouping_buffer = _PyMem_Strdup(grouping);
720 if (locale_info->grouping_buffer == NULL) {
721 PyErr_NoMemory();
722 return -1;
723 }
724 locale_info->grouping = locale_info->grouping_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200725 break;
726 }
727 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400728 case LT_UNDERSCORE_LOCALE:
729 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100730 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400731 locale_info->thousands_sep = PyUnicode_FromOrdinal(
732 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700733 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100734 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400735 if (type != LT_UNDER_FOUR_LOCALE)
736 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200737 (implicit) trailing 0 means repeat
738 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400739 else
740 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200741 break;
742 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100743 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
744 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700745 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100746 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200747 locale_info->grouping = no_grouping;
748 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200749 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100750 return 0;
751}
752
753static void
754free_locale_info(LocaleInfo *locale_info)
755{
756 Py_XDECREF(locale_info->decimal_point);
757 Py_XDECREF(locale_info->thousands_sep);
Victor Stinner6eff6b82018-11-20 22:06:21 +0100758 PyMem_Free(locale_info->grouping_buffer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759}
760
761/************************************************************************/
762/*********** string formatting ******************************************/
763/************************************************************************/
764
Victor Stinnerd3f08822012-05-29 12:57:52 +0200765static int
766format_string_internal(PyObject *value, const InternalFormatSpec *format,
767 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768{
769 Py_ssize_t lpad;
770 Py_ssize_t rpad;
771 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200772 Py_ssize_t len;
773 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200774 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200775
Victor Stinnerd3f08822012-05-29 12:57:52 +0200776 assert(PyUnicode_IS_READY(value));
777 len = PyUnicode_GET_LENGTH(value);
778
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200779 /* sign is not allowed on strings */
780 if (format->sign != '\0') {
781 PyErr_SetString(PyExc_ValueError,
782 "Sign not allowed in string format specifier");
783 goto done;
784 }
785
786 /* alternate is not allowed on strings */
787 if (format->alternate) {
788 PyErr_SetString(PyExc_ValueError,
789 "Alternate form (#) not allowed in string format "
790 "specifier");
791 goto done;
792 }
793
794 /* '=' alignment not allowed on strings */
795 if (format->align == '=') {
796 PyErr_SetString(PyExc_ValueError,
797 "'=' alignment not allowed "
798 "in string format specifier");
799 goto done;
800 }
801
Victor Stinner621ef3d2012-10-02 00:33:47 +0200802 if ((format->width == -1 || format->width <= len)
803 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200804 /* Fast path */
805 return _PyUnicodeWriter_WriteStr(writer, value);
806 }
807
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 /* if precision is specified, output no more that format.precision
809 characters */
810 if (format->precision >= 0 && len >= format->precision) {
811 len = format->precision;
812 }
813
814 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
815
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200816 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100817 if (lpad != 0 || rpad != 0)
818 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200819 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
820 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
821 maxchar = Py_MAX(maxchar, valmaxchar);
822 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100823
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200824 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200825 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200826 goto done;
827
828 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400829 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200830 if (result == -1)
831 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200832
833 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200834 if (len) {
835 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
836 value, 0, len);
837 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200838 writer->pos += (len + rpad);
839 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200840
841done:
842 return result;
843}
844
845
846/************************************************************************/
847/*********** long formatting ********************************************/
848/************************************************************************/
849
Victor Stinnerd3f08822012-05-29 12:57:52 +0200850static int
851format_long_internal(PyObject *value, const InternalFormatSpec *format,
852 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200853{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200854 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100855 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200856 PyObject *tmp = NULL;
857 Py_ssize_t inumeric_chars;
858 Py_UCS4 sign_char = '\0';
859 Py_ssize_t n_digits; /* count of digits need from the computed
860 string */
861 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
862 produces non-digits */
863 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
864 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100865 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 NumberFieldWidths spec;
867 long x;
868
869 /* Locale settings, either from the actual locale or
870 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100871 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200872
873 /* no precision allowed on integers */
874 if (format->precision != -1) {
875 PyErr_SetString(PyExc_ValueError,
876 "Precision not allowed in integer format specifier");
877 goto done;
878 }
879
880 /* special case for character formatting */
881 if (format->type == 'c') {
882 /* error to specify a sign */
883 if (format->sign != '\0') {
884 PyErr_SetString(PyExc_ValueError,
885 "Sign not allowed with integer"
886 " format specifier 'c'");
887 goto done;
888 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400889 /* error to request alternate format */
890 if (format->alternate) {
891 PyErr_SetString(PyExc_ValueError,
892 "Alternate form (#) not allowed with integer"
893 " format specifier 'c'");
894 goto done;
895 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200896
897 /* taken from unicodeobject.c formatchar() */
898 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200899 x = PyLong_AsLong(value);
900 if (x == -1 && PyErr_Occurred())
901 goto done;
902 if (x < 0 || x > 0x10ffff) {
903 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100904 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200905 goto done;
906 }
907 tmp = PyUnicode_FromOrdinal(x);
908 inumeric_chars = 0;
909 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100910 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200911
912 /* As a sort-of hack, we tell calc_number_widths that we only
913 have "remainder" characters. calc_number_widths thinks
914 these are characters that don't get formatted, only copied
915 into the output string. We do this for 'c' formatting,
916 because the characters are likely to be non-digits. */
917 n_remainder = 1;
918 }
919 else {
920 int base;
921 int leading_chars_to_skip = 0; /* Number of characters added by
922 PyNumber_ToBase that we want to
923 skip over. */
924
925 /* Compute the base and how many characters will be added by
926 PyNumber_ToBase */
927 switch (format->type) {
928 case 'b':
929 base = 2;
930 leading_chars_to_skip = 2; /* 0b */
931 break;
932 case 'o':
933 base = 8;
934 leading_chars_to_skip = 2; /* 0o */
935 break;
936 case 'x':
937 case 'X':
938 base = 16;
939 leading_chars_to_skip = 2; /* 0x */
940 break;
941 default: /* shouldn't be needed, but stops a compiler warning */
942 case 'd':
943 case 'n':
944 base = 10;
945 break;
946 }
947
Victor Stinnerd3f08822012-05-29 12:57:52 +0200948 if (format->sign != '+' && format->sign != ' '
949 && format->width == -1
950 && format->type != 'X' && format->type != 'n'
951 && !format->thousands_separators
952 && PyLong_CheckExact(value))
953 {
954 /* Fast path */
955 return _PyLong_FormatWriter(writer, value, base, format->alternate);
956 }
957
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 /* The number of prefix chars is the same as the leading
959 chars to skip */
960 if (format->alternate)
961 n_prefix = leading_chars_to_skip;
962
963 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200964 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
966 goto done;
967
968 inumeric_chars = 0;
969 n_digits = PyUnicode_GET_LENGTH(tmp);
970
971 prefix = inumeric_chars;
972
973 /* Is a sign character present in the output? If so, remember it
974 and skip it */
975 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
976 sign_char = '-';
977 ++prefix;
978 ++leading_chars_to_skip;
979 }
980
981 /* Skip over the leading chars (0x, 0b, etc.) */
982 n_digits -= leading_chars_to_skip;
983 inumeric_chars += leading_chars_to_skip;
984 }
985
986 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100987 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400988 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100989 &locale) == -1)
990 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991
992 /* Calculate how much memory we'll need. */
993 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100994 inumeric_chars + n_digits, n_remainder, 0,
995 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100996
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200997 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200998 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 goto done;
1000
1001 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001002 result = fill_number(writer, &spec,
1003 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001004 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001005 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006
1007done:
1008 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001009 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010 return result;
1011}
1012
1013/************************************************************************/
1014/*********** float formatting *******************************************/
1015/************************************************************************/
1016
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001017/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001018static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001020 const InternalFormatSpec *format,
1021 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001022{
1023 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1024 Py_ssize_t n_digits;
1025 Py_ssize_t n_remainder;
1026 Py_ssize_t n_total;
1027 int has_decimal;
1028 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001029 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001030 Py_UCS4 type = format->type;
1031 int add_pct = 0;
1032 Py_ssize_t index;
1033 NumberFieldWidths spec;
1034 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001035 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001036 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001037 Py_UCS4 sign_char = '\0';
1038 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1039 PyObject *unicode_tmp = NULL;
1040
1041 /* Locale settings, either from the actual locale or
1042 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001043 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001044
Victor Stinner2f084ec2013-06-23 14:54:30 +02001045 if (format->precision > INT_MAX) {
1046 PyErr_SetString(PyExc_ValueError, "precision too big");
1047 goto done;
1048 }
1049 precision = (int)format->precision;
1050
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001051 if (format->alternate)
1052 flags |= Py_DTSF_ALT;
1053
1054 if (type == '\0') {
1055 /* Omitted type specifier. Behaves in the same way as repr(x)
1056 and str(x) if no precision is given, else like 'g', but with
1057 at least one digit after the decimal point. */
1058 flags |= Py_DTSF_ADD_DOT_0;
1059 type = 'r';
1060 default_precision = 0;
1061 }
1062
1063 if (type == 'n')
1064 /* 'n' is the same as 'g', except for the locale used to
1065 format the result. We take care of that later. */
1066 type = 'g';
1067
1068 val = PyFloat_AsDouble(value);
1069 if (val == -1.0 && PyErr_Occurred())
1070 goto done;
1071
1072 if (type == '%') {
1073 type = 'f';
1074 val *= 100;
1075 add_pct = 1;
1076 }
1077
1078 if (precision < 0)
1079 precision = default_precision;
1080 else if (type == 'r')
1081 type = 'g';
1082
Martin Panter4c359642016-05-08 13:53:41 +00001083 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084 8-bit char. This is safe, because we've restricted what "type"
1085 can be. */
1086 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1087 &float_type);
1088 if (buf == NULL)
1089 goto done;
1090 n_digits = strlen(buf);
1091
1092 if (add_pct) {
1093 /* We know that buf has a trailing zero (since we just called
1094 strlen() on it), and we don't use that fact any more. So we
1095 can just write over the trailing zero. */
1096 buf[n_digits] = '%';
1097 n_digits += 1;
1098 }
1099
Victor Stinnerd3f08822012-05-29 12:57:52 +02001100 if (format->sign != '+' && format->sign != ' '
1101 && format->width == -1
1102 && format->type != 'n'
1103 && !format->thousands_separators)
1104 {
1105 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001106 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1107 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001108 return result;
1109 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110
Victor Stinner4a587072013-11-19 12:54:53 +01001111 /* Since there is no unicode version of PyOS_double_to_string,
1112 just use the 8 bit version and then convert to unicode. */
1113 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1114 PyMem_Free(buf);
1115 if (unicode_tmp == NULL)
1116 goto done;
1117
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 /* Is a sign character present in the output? If so, remember it
1119 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001120 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001121 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1122 sign_char = '-';
1123 ++index;
1124 --n_digits;
1125 }
1126
1127 /* Determine if we have any "remainder" (after the digits, might include
1128 decimal or exponent or both (or neither)) */
1129 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1130
1131 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001132 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001133 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001134 &locale) == -1)
1135 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136
1137 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001138 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001140 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001141
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001143 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 goto done;
1145
1146 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001147 result = fill_number(writer, &spec,
1148 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001149 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001150 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151
1152done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001153 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001154 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001155 return result;
1156}
1157
1158/************************************************************************/
1159/*********** complex formatting *****************************************/
1160/************************************************************************/
1161
Victor Stinnerd3f08822012-05-29 12:57:52 +02001162static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001164 const InternalFormatSpec *format,
1165 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001166{
1167 double re;
1168 double im;
1169 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1170 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1171
1172 InternalFormatSpec tmp_format = *format;
1173 Py_ssize_t n_re_digits;
1174 Py_ssize_t n_im_digits;
1175 Py_ssize_t n_re_remainder;
1176 Py_ssize_t n_im_remainder;
1177 Py_ssize_t n_re_total;
1178 Py_ssize_t n_im_total;
1179 int re_has_decimal;
1180 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001181 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001182 Py_UCS4 type = format->type;
1183 Py_ssize_t i_re;
1184 Py_ssize_t i_im;
1185 NumberFieldWidths re_spec;
1186 NumberFieldWidths im_spec;
1187 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001188 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001189 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001190 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001192 Py_UCS4 re_sign_char = '\0';
1193 Py_UCS4 im_sign_char = '\0';
1194 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1195 int im_float_type;
1196 int add_parens = 0;
1197 int skip_re = 0;
1198 Py_ssize_t lpad;
1199 Py_ssize_t rpad;
1200 Py_ssize_t total;
1201 PyObject *re_unicode_tmp = NULL;
1202 PyObject *im_unicode_tmp = NULL;
1203
1204 /* Locale settings, either from the actual locale or
1205 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001206 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001207
Victor Stinner2f084ec2013-06-23 14:54:30 +02001208 if (format->precision > INT_MAX) {
1209 PyErr_SetString(PyExc_ValueError, "precision too big");
1210 goto done;
1211 }
1212 precision = (int)format->precision;
1213
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001214 /* Zero padding is not allowed. */
1215 if (format->fill_char == '0') {
1216 PyErr_SetString(PyExc_ValueError,
1217 "Zero padding is not allowed in complex format "
1218 "specifier");
1219 goto done;
1220 }
1221
1222 /* Neither is '=' alignment . */
1223 if (format->align == '=') {
1224 PyErr_SetString(PyExc_ValueError,
1225 "'=' alignment flag is not allowed in complex format "
1226 "specifier");
1227 goto done;
1228 }
1229
1230 re = PyComplex_RealAsDouble(value);
1231 if (re == -1.0 && PyErr_Occurred())
1232 goto done;
1233 im = PyComplex_ImagAsDouble(value);
1234 if (im == -1.0 && PyErr_Occurred())
1235 goto done;
1236
1237 if (format->alternate)
1238 flags |= Py_DTSF_ALT;
1239
1240 if (type == '\0') {
1241 /* Omitted type specifier. Should be like str(self). */
1242 type = 'r';
1243 default_precision = 0;
1244 if (re == 0.0 && copysign(1.0, re) == 1.0)
1245 skip_re = 1;
1246 else
1247 add_parens = 1;
1248 }
1249
1250 if (type == 'n')
1251 /* 'n' is the same as 'g', except for the locale used to
1252 format the result. We take care of that later. */
1253 type = 'g';
1254
1255 if (precision < 0)
1256 precision = default_precision;
1257 else if (type == 'r')
1258 type = 'g';
1259
Martin Panter4c359642016-05-08 13:53:41 +00001260 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001261 8-bit char. This is safe, because we've restricted what "type"
1262 can be. */
1263 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1264 &re_float_type);
1265 if (re_buf == NULL)
1266 goto done;
1267 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1268 &im_float_type);
1269 if (im_buf == NULL)
1270 goto done;
1271
1272 n_re_digits = strlen(re_buf);
1273 n_im_digits = strlen(im_buf);
1274
1275 /* Since there is no unicode version of PyOS_double_to_string,
1276 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001277 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 if (re_unicode_tmp == NULL)
1279 goto done;
1280 i_re = 0;
1281
Victor Stinnerd3f08822012-05-29 12:57:52 +02001282 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283 if (im_unicode_tmp == NULL)
1284 goto done;
1285 i_im = 0;
1286
1287 /* Is a sign character present in the output? If so, remember it
1288 and skip it */
1289 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1290 re_sign_char = '-';
1291 ++i_re;
1292 --n_re_digits;
1293 }
1294 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1295 im_sign_char = '-';
1296 ++i_im;
1297 --n_im_digits;
1298 }
1299
1300 /* Determine if we have any "remainder" (after the digits, might include
1301 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001302 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001304 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 &n_im_remainder, &im_has_decimal);
1306
1307 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001308 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001309 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001310 &locale) == -1)
1311 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312
1313 /* Turn off any padding. We'll do it later after we've composed
1314 the numbers without padding. */
1315 tmp_format.fill_char = '\0';
1316 tmp_format.align = '<';
1317 tmp_format.width = -1;
1318
1319 /* Calculate how much memory we'll need. */
1320 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1321 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001322 re_has_decimal, &locale, &tmp_format,
1323 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324
1325 /* Same formatting, but always include a sign, unless the real part is
1326 * going to be omitted, in which case we use whatever sign convention was
1327 * requested by the original format. */
1328 if (!skip_re)
1329 tmp_format.sign = '+';
1330 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1331 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001332 im_has_decimal, &locale, &tmp_format,
1333 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334
1335 if (skip_re)
1336 n_re_total = 0;
1337
1338 /* Add 1 for the 'j', and optionally 2 for parens. */
1339 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1340 format->width, format->align, &lpad, &rpad, &total);
1341
Victor Stinner41a863c2012-02-24 00:37:51 +01001342 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001343 maxchar = Py_MAX(maxchar, format->fill_char);
1344
Victor Stinnerd3f08822012-05-29 12:57:52 +02001345 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001347 rkind = writer->kind;
1348 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349
1350 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001351 result = fill_padding(writer,
1352 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001353 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001354 if (result == -1)
1355 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356
Victor Stinnerd3f08822012-05-29 12:57:52 +02001357 if (add_parens) {
1358 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1359 writer->pos++;
1360 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001361
1362 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001363 result = fill_number(writer, &re_spec,
1364 re_unicode_tmp, i_re, i_re + n_re_digits,
1365 NULL, 0,
1366 0,
1367 &locale, 0);
1368 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001369 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001370 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001371 result = fill_number(writer, &im_spec,
1372 im_unicode_tmp, i_im, i_im + n_im_digits,
1373 NULL, 0,
1374 0,
1375 &locale, 0);
1376 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001377 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001378 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1379 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001380
Victor Stinnerd3f08822012-05-29 12:57:52 +02001381 if (add_parens) {
1382 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1383 writer->pos++;
1384 }
1385
1386 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001387
1388done:
1389 PyMem_Free(re_buf);
1390 PyMem_Free(im_buf);
1391 Py_XDECREF(re_unicode_tmp);
1392 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001393 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394 return result;
1395}
1396
1397/************************************************************************/
1398/*********** built in formatters ****************************************/
1399/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001400static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001401format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1402{
1403 PyObject *str;
1404 int err;
1405
1406 str = PyObject_Str(obj);
1407 if (str == NULL)
1408 return -1;
1409 err = _PyUnicodeWriter_WriteStr(writer, str);
1410 Py_DECREF(str);
1411 return err;
1412}
1413
1414int
1415_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1416 PyObject *obj,
1417 PyObject *format_spec,
1418 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001419{
1420 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001421
1422 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001423
1424 /* check for the special case of zero length format spec, make
1425 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001426 if (start == end) {
1427 if (PyUnicode_CheckExact(obj))
1428 return _PyUnicodeWriter_WriteStr(writer, obj);
1429 else
1430 return format_obj(obj, writer);
1431 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001432
1433 /* parse the format_spec */
1434 if (!parse_internal_render_format_spec(format_spec, start, end,
1435 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001436 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001437
1438 /* type conversion? */
1439 switch (format.type) {
1440 case 's':
1441 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001442 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443 default:
1444 /* unknown */
1445 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001446 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001447 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001448}
1449
Victor Stinnerd3f08822012-05-29 12:57:52 +02001450int
1451_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1452 PyObject *obj,
1453 PyObject *format_spec,
1454 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001455{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001456 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001457 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001458 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001459
1460 /* check for the special case of zero length format spec, make
1461 it equivalent to str(obj) */
1462 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001463 if (PyLong_CheckExact(obj))
1464 return _PyLong_FormatWriter(writer, obj, 10, 0);
1465 else
1466 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001467 }
1468
1469 /* parse the format_spec */
1470 if (!parse_internal_render_format_spec(format_spec, start, end,
1471 &format, 'd', '>'))
1472 goto done;
1473
1474 /* type conversion? */
1475 switch (format.type) {
1476 case 'b':
1477 case 'c':
1478 case 'd':
1479 case 'o':
1480 case 'x':
1481 case 'X':
1482 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001483 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001484 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001485 break;
1486
1487 case 'e':
1488 case 'E':
1489 case 'f':
1490 case 'F':
1491 case 'g':
1492 case 'G':
1493 case '%':
1494 /* convert to float */
1495 tmp = PyNumber_Float(obj);
1496 if (tmp == NULL)
1497 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001498 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001499 break;
1500
1501 default:
1502 /* unknown */
1503 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1504 goto done;
1505 }
1506
1507done:
1508 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001509 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001510 return result;
1511}
1512
Victor Stinnerd3f08822012-05-29 12:57:52 +02001513int
1514_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1515 PyObject *obj,
1516 PyObject *format_spec,
1517 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001518{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001519 InternalFormatSpec format;
1520
1521 /* check for the special case of zero length format spec, make
1522 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001523 if (start == end)
1524 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001525
1526 /* parse the format_spec */
1527 if (!parse_internal_render_format_spec(format_spec, start, end,
1528 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001529 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001530
1531 /* type conversion? */
1532 switch (format.type) {
1533 case '\0': /* No format code: like 'g', but with at least one decimal. */
1534 case 'e':
1535 case 'E':
1536 case 'f':
1537 case 'F':
1538 case 'g':
1539 case 'G':
1540 case 'n':
1541 case '%':
1542 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001543 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001544
1545 default:
1546 /* unknown */
1547 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001548 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001549 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001550}
1551
Victor Stinnerd3f08822012-05-29 12:57:52 +02001552int
1553_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1554 PyObject *obj,
1555 PyObject *format_spec,
1556 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001557{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001558 InternalFormatSpec format;
1559
1560 /* check for the special case of zero length format spec, make
1561 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001562 if (start == end)
1563 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001564
1565 /* parse the format_spec */
1566 if (!parse_internal_render_format_spec(format_spec, start, end,
1567 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001568 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001569
1570 /* type conversion? */
1571 switch (format.type) {
1572 case '\0': /* No format code: like 'g', but with at least one decimal. */
1573 case 'e':
1574 case 'E':
1575 case 'f':
1576 case 'F':
1577 case 'g':
1578 case 'G':
1579 case 'n':
1580 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001581 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001582
1583 default:
1584 /* unknown */
1585 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001586 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001587 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001588}