blob: a2c2b3627c9ec74cebdf698962e73f170919c4d6 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040035 "Cannot specify ',' or '_' with '%c'.",
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020036 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040039 "Cannot specify ',' or '_' with '\\x%x'.",
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020040 (unsigned int)presentation_type);
41}
42
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040043static void
Benjamin Petersoneb0dfa92016-09-09 20:14:05 -070044invalid_comma_and_underscore(void)
Eric V. Smith89e1b1a2016-09-09 23:06:47 -040045{
46 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
47}
48
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020049/*
50 get_integer consumes 0 or more decimal digit characters from an
51 input string, updates *result with the corresponding positive
52 integer, and returns the number of digits consumed.
53
54 returns -1 on error.
55*/
56static int
Serhiy Storchaka1f932612016-08-29 15:57:26 +030057get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020058 Py_ssize_t *result)
59{
Serhiy Storchaka1f932612016-08-29 15:57:26 +030060 Py_ssize_t accumulator, digitval, pos = *ppos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020061 int numdigits;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030062 int kind = PyUnicode_KIND(str);
63 void *data = PyUnicode_DATA(str);
64
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020065 accumulator = numdigits = 0;
Serhiy Storchaka1f932612016-08-29 15:57:26 +030066 for (; pos < end; pos++, numdigits++) {
67 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 if (digitval < 0)
69 break;
70 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000071 Detect possible overflow before it happens:
72
73 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
74 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 */
Mark Dickinson47862d42011-12-01 15:27:04 +000076 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 PyErr_Format(PyExc_ValueError,
78 "Too many decimal digits in format string");
Serhiy Storchaka1f932612016-08-29 15:57:26 +030079 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020080 return -1;
81 }
Mark Dickinson47862d42011-12-01 15:27:04 +000082 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020083 }
Serhiy Storchaka1f932612016-08-29 15:57:26 +030084 *ppos = pos;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 *result = accumulator;
86 return numdigits;
87}
88
89/************************************************************************/
90/*********** standard format specifier parsing **************************/
91/************************************************************************/
92
93/* returns true if this character is a specifier alignment token */
94Py_LOCAL_INLINE(int)
95is_alignment_token(Py_UCS4 c)
96{
97 switch (c) {
98 case '<': case '>': case '=': case '^':
99 return 1;
100 default:
101 return 0;
102 }
103}
104
105/* returns true if this character is a sign element */
106Py_LOCAL_INLINE(int)
107is_sign_element(Py_UCS4 c)
108{
109 switch (c) {
110 case ' ': case '+': case '-':
111 return 1;
112 default:
113 return 0;
114 }
115}
Eric Smith8c663262007-08-25 02:26:07 +0000116
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400117/* Locale type codes. LT_NO_LOCALE must be zero. */
Benjamin Peterson995026a2016-09-13 22:46:15 -0700118enum LocaleType {
119 LT_NO_LOCALE = 0,
120 LT_DEFAULT_LOCALE,
121 LT_UNDERSCORE_LOCALE,
122 LT_UNDER_FOUR_LOCALE,
123 LT_CURRENT_LOCALE
124};
Eric Smith4a7d76d2008-05-30 18:10:19 +0000125
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200126typedef struct {
127 Py_UCS4 fill_char;
128 Py_UCS4 align;
129 int alternate;
130 Py_UCS4 sign;
131 Py_ssize_t width;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700132 enum LocaleType thousands_separators;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200133 Py_ssize_t precision;
134 Py_UCS4 type;
135} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000136
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200137#if 0
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700138/* Occasionally useful for debugging. Should normally be commented out. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200139static void
140DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
141{
142 printf("internal format spec: fill_char %d\n", format->fill_char);
143 printf("internal format spec: align %d\n", format->align);
144 printf("internal format spec: alternate %d\n", format->alternate);
145 printf("internal format spec: sign %d\n", format->sign);
146 printf("internal format spec: width %zd\n", format->width);
147 printf("internal format spec: thousands_separators %d\n",
148 format->thousands_separators);
149 printf("internal format spec: precision %zd\n", format->precision);
150 printf("internal format spec: type %c\n", format->type);
151 printf("\n");
152}
153#endif
154
155
156/*
157 ptr points to the start of the format_spec, end points just past its end.
158 fills in format with the parsed information.
159 returns 1 on success, 0 on failure.
160 if failure, sets the exception
161*/
162static int
163parse_internal_render_format_spec(PyObject *format_spec,
164 Py_ssize_t start, Py_ssize_t end,
165 InternalFormatSpec *format,
166 char default_type,
167 char default_align)
168{
169 Py_ssize_t pos = start;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300170 int kind = PyUnicode_KIND(format_spec);
171 void *data = PyUnicode_DATA(format_spec);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 /* end-pos is used throughout this code to specify the length of
173 the input string */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300174#define READ_spec(index) PyUnicode_READ(kind, data, index)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175
176 Py_ssize_t consumed;
177 int align_specified = 0;
Eric V. Smith2ea97122014-04-14 11:55:10 -0400178 int fill_char_specified = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179
Eric V. Smith2ea97122014-04-14 11:55:10 -0400180 format->fill_char = ' ';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181 format->align = default_align;
182 format->alternate = 0;
183 format->sign = '\0';
184 format->width = -1;
Benjamin Peterson995026a2016-09-13 22:46:15 -0700185 format->thousands_separators = LT_NO_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 format->precision = -1;
187 format->type = default_type;
188
189 /* If the second char is an alignment token,
190 then parse the fill char */
191 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
192 format->align = READ_spec(pos+1);
193 format->fill_char = READ_spec(pos);
Eric V. Smith2ea97122014-04-14 11:55:10 -0400194 fill_char_specified = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200195 align_specified = 1;
196 pos += 2;
197 }
198 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
199 format->align = READ_spec(pos);
200 align_specified = 1;
201 ++pos;
202 }
203
204 /* Parse the various sign options */
205 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
206 format->sign = READ_spec(pos);
207 ++pos;
208 }
209
210 /* If the next character is #, we're in alternate mode. This only
211 applies to integers. */
212 if (end-pos >= 1 && READ_spec(pos) == '#') {
213 format->alternate = 1;
214 ++pos;
215 }
216
217 /* The special case for 0-padding (backwards compat) */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400218 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200219 format->fill_char = '0';
220 if (!align_specified) {
221 format->align = '=';
222 }
223 ++pos;
224 }
225
226 consumed = get_integer(format_spec, &pos, end, &format->width);
227 if (consumed == -1)
228 /* Overflow error. Exception already set. */
229 return 0;
230
231 /* If consumed is 0, we didn't consume any characters for the
232 width. In that case, reset the width to -1, because
233 get_integer() will have set it to zero. -1 is how we record
234 that the width wasn't specified. */
235 if (consumed == 0)
236 format->width = -1;
237
238 /* Comma signifies add thousands separators */
239 if (end-pos && READ_spec(pos) == ',') {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400240 format->thousands_separators = LT_DEFAULT_LOCALE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200241 ++pos;
242 }
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400243 /* Underscore signifies add thousands separators */
244 if (end-pos && READ_spec(pos) == '_') {
Benjamin Peterson995026a2016-09-13 22:46:15 -0700245 if (format->thousands_separators != LT_NO_LOCALE) {
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400246 invalid_comma_and_underscore();
247 return 0;
248 }
249 format->thousands_separators = LT_UNDERSCORE_LOCALE;
250 ++pos;
251 }
252 if (end-pos && READ_spec(pos) == ',') {
253 invalid_comma_and_underscore();
254 return 0;
255 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200256
257 /* Parse field precision */
258 if (end-pos && READ_spec(pos) == '.') {
259 ++pos;
260
261 consumed = get_integer(format_spec, &pos, end, &format->precision);
262 if (consumed == -1)
263 /* Overflow error. Exception already set. */
264 return 0;
265
266 /* Not having a precision after a dot is an error. */
267 if (consumed == 0) {
268 PyErr_Format(PyExc_ValueError,
269 "Format specifier missing precision");
270 return 0;
271 }
272
273 }
274
275 /* Finally, parse the type field. */
276
277 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500278 /* More than one char remain, invalid format specifier. */
279 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200280 return 0;
281 }
282
283 if (end-pos == 1) {
284 format->type = READ_spec(pos);
285 ++pos;
286 }
287
288 /* Do as much validating as we can, just by looking at the format
289 specifier. Do not take into account what type of formatting
290 we're doing (int, float, string). */
291
292 if (format->thousands_separators) {
293 switch (format->type) {
294 case 'd':
295 case 'e':
296 case 'f':
297 case 'g':
298 case 'E':
299 case 'G':
300 case '%':
301 case 'F':
302 case '\0':
303 /* These are allowed. See PEP 378.*/
304 break;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400305 case 'b':
306 case 'o':
307 case 'x':
308 case 'X':
309 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
310 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
311 /* Every four digits, not every three, in bin/oct/hex. */
312 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
313 break;
314 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200315 default:
316 invalid_comma_type(format->type);
317 return 0;
318 }
319 }
320
Victor Stinnera4ac6002012-01-21 15:50:49 +0100321 assert (format->align <= 127);
322 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 return 1;
324}
325
326/* Calculate the padding needed. */
327static void
328calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
329 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
330 Py_ssize_t *n_total)
331{
332 if (width >= 0) {
333 if (nchars > width)
334 *n_total = nchars;
335 else
336 *n_total = width;
337 }
338 else {
339 /* not specified, use all of the chars and no more */
340 *n_total = nchars;
341 }
342
343 /* Figure out how much leading space we need, based on the
344 aligning */
345 if (align == '>')
346 *n_lpadding = *n_total - nchars;
347 else if (align == '^')
348 *n_lpadding = (*n_total - nchars) / 2;
349 else if (align == '<' || align == '=')
350 *n_lpadding = 0;
351 else {
352 /* We should never have an unspecified alignment. */
353 *n_lpadding = 0;
354 assert(0);
355 }
356
357 *n_rpadding = *n_total - nchars - *n_lpadding;
358}
359
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200360/* Do the padding, and return a pointer to where the caller-supplied
361 content goes. */
Victor Stinner9ce59bb2013-05-17 00:04:56 +0200362static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200363fill_padding(_PyUnicodeWriter *writer,
364 Py_ssize_t nchars,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200365 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
366 Py_ssize_t n_rpadding)
367{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200368 Py_ssize_t pos;
369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 /* Pad on left. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200371 if (n_lpadding) {
372 pos = writer->pos;
373 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
374 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375
376 /* Pad on right. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200377 if (n_rpadding) {
378 pos = writer->pos + nchars + n_lpadding;
379 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
380 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200381
382 /* Pointer to the user content. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200383 writer->pos += n_lpadding;
384 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200385}
386
387/************************************************************************/
388/*********** common routines for numeric formatting *********************/
389/************************************************************************/
390
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200391/* Locale info needed for formatting integers and the part of floats
392 before and including the decimal. Note that locales only support
393 8-bit chars, not unicode. */
394typedef struct {
Victor Stinner41a863c2012-02-24 00:37:51 +0100395 PyObject *decimal_point;
396 PyObject *thousands_sep;
397 const char *grouping;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398} LocaleInfo;
399
Victor Stinner41a863c2012-02-24 00:37:51 +0100400#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
401
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200402/* describes the layout for an integer, see the comment in
403 calc_number_widths() for details */
404typedef struct {
405 Py_ssize_t n_lpadding;
406 Py_ssize_t n_prefix;
407 Py_ssize_t n_spadding;
408 Py_ssize_t n_rpadding;
409 char sign;
410 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
411 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
412 any grouping chars. */
413 Py_ssize_t n_decimal; /* 0 if only an integer */
414 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
415 excluding the decimal itself, if
416 present. */
417
418 /* These 2 are not the widths of fields, but are needed by
419 STRINGLIB_GROUPING. */
420 Py_ssize_t n_digits; /* The number of digits before a decimal
421 or exponent. */
422 Py_ssize_t n_min_width; /* The min_width we used when we computed
423 the n_grouped_digits width. */
424} NumberFieldWidths;
425
426
427/* Given a number of the form:
428 digits[remainder]
429 where ptr points to the start and end points to the end, find where
430 the integer part ends. This could be a decimal, an exponent, both,
431 or neither.
432 If a decimal point is present, set *has_decimal and increment
433 remainder beyond it.
434 Results are undefined (but shouldn't crash) for improperly
435 formatted strings.
436*/
437static void
438parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
439 Py_ssize_t *n_remainder, int *has_decimal)
440{
441 Py_ssize_t remainder;
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300442 int kind = PyUnicode_KIND(s);
443 void *data = PyUnicode_DATA(s);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300445 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 ++pos;
447 remainder = pos;
448
449 /* Does remainder start with a decimal point? */
Serhiy Storchaka1f932612016-08-29 15:57:26 +0300450 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451
452 /* Skip the decimal point. */
453 if (*has_decimal)
454 remainder++;
455
456 *n_remainder = end - remainder;
457}
458
459/* not all fields of format are used. for example, precision is
460 unused. should this take discrete params in order to be more clear
461 about what it does? or is passing a single format parameter easier
462 and more efficient enough to justify a little obfuscation? */
463static Py_ssize_t
464calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
465 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
466 Py_ssize_t n_end, Py_ssize_t n_remainder,
467 int has_decimal, const LocaleInfo *locale,
Victor Stinner41a863c2012-02-24 00:37:51 +0100468 const InternalFormatSpec *format, Py_UCS4 *maxchar)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469{
470 Py_ssize_t n_non_digit_non_padding;
471 Py_ssize_t n_padding;
472
473 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
474 spec->n_lpadding = 0;
475 spec->n_prefix = n_prefix;
Victor Stinner41a863c2012-02-24 00:37:51 +0100476 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200477 spec->n_remainder = n_remainder;
478 spec->n_spadding = 0;
479 spec->n_rpadding = 0;
480 spec->sign = '\0';
481 spec->n_sign = 0;
482
483 /* the output will look like:
484 | |
485 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
486 | |
487
488 sign is computed from format->sign and the actual
489 sign of the number
490
491 prefix is given (it's for the '0x' prefix)
492
493 digits is already known
494
495 the total width is either given, or computed from the
496 actual digits
497
498 only one of lpadding, spadding, and rpadding can be non-zero,
499 and it's calculated from the width and other fields
500 */
501
502 /* compute the various parts we're going to write */
503 switch (format->sign) {
504 case '+':
505 /* always put a + or - */
506 spec->n_sign = 1;
507 spec->sign = (sign_char == '-' ? '-' : '+');
508 break;
509 case ' ':
510 spec->n_sign = 1;
511 spec->sign = (sign_char == '-' ? '-' : ' ');
512 break;
513 default:
514 /* Not specified, or the default (-) */
515 if (sign_char == '-') {
516 spec->n_sign = 1;
517 spec->sign = '-';
518 }
519 }
520
521 /* The number of chars used for non-digits and non-padding. */
522 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
523 spec->n_remainder;
524
525 /* min_width can go negative, that's okay. format->width == -1 means
526 we don't care. */
527 if (format->fill_char == '0' && format->align == '=')
528 spec->n_min_width = format->width - n_non_digit_non_padding;
529 else
530 spec->n_min_width = 0;
531
532 if (spec->n_digits == 0)
533 /* This case only occurs when using 'c' formatting, we need
534 to special case it because the grouping code always wants
535 to have at least one character. */
536 spec->n_grouped_digits = 0;
Victor Stinner41a863c2012-02-24 00:37:51 +0100537 else {
538 Py_UCS4 grouping_maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200539 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner41a863c2012-02-24 00:37:51 +0100540 NULL, 0,
541 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100543 locale->grouping, locale->thousands_sep, &grouping_maxchar);
544 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
545 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200546
547 /* Given the desired width and the total of digit and non-digit
548 space we consume, see if we need any padding. format->width can
549 be negative (meaning no padding), but this code still works in
550 that case. */
551 n_padding = format->width -
552 (n_non_digit_non_padding + spec->n_grouped_digits);
553 if (n_padding > 0) {
554 /* Some padding is needed. Determine if it's left, space, or right. */
555 switch (format->align) {
556 case '<':
557 spec->n_rpadding = n_padding;
558 break;
559 case '^':
560 spec->n_lpadding = n_padding / 2;
561 spec->n_rpadding = n_padding - spec->n_lpadding;
562 break;
563 case '=':
564 spec->n_spadding = n_padding;
565 break;
566 case '>':
567 spec->n_lpadding = n_padding;
568 break;
569 default:
570 /* Shouldn't get here, but treat it as '>' */
571 spec->n_lpadding = n_padding;
572 assert(0);
573 break;
574 }
575 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100576
577 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
578 *maxchar = Py_MAX(*maxchar, format->fill_char);
579
Victor Stinner90f50d42012-02-24 01:44:47 +0100580 if (spec->n_decimal)
581 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
582
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200583 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
584 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
585 spec->n_remainder + spec->n_rpadding;
586}
587
588/* Fill in the digit parts of a numbers's string representation,
589 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200590 Return -1 on error, or 0 on success. */
591static int
Victor Stinnerd3f08822012-05-29 12:57:52 +0200592fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200593 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200594 PyObject *prefix, Py_ssize_t p_start,
595 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 LocaleInfo *locale, int toupper)
597{
598 /* Used to keep track of digits, decimal, and remainder. */
599 Py_ssize_t d_pos = d_start;
Victor Stinner22c103b2013-05-07 23:50:03 +0200600 const unsigned int kind = writer->kind;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200601 const void *data = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200602 Py_ssize_t r;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200603
604 if (spec->n_lpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200605 _PyUnicode_FastFill(writer->buffer,
606 writer->pos, spec->n_lpadding, fill_char);
607 writer->pos += spec->n_lpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 }
609 if (spec->n_sign == 1) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200610 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
611 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200612 }
613 if (spec->n_prefix) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200614 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
615 prefix, p_start,
616 spec->n_prefix);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200617 if (toupper) {
618 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500619 for (t = 0; t < spec->n_prefix; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200620 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100621 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100622 assert (c <= 127);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200623 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200626 writer->pos += spec->n_prefix;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200627 }
628 if (spec->n_spadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200629 _PyUnicode_FastFill(writer->buffer,
630 writer->pos, spec->n_spadding, fill_char);
631 writer->pos += spec->n_spadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200632 }
633
634 /* Only for type 'c' special case, it has no digits. */
635 if (spec->n_digits != 0) {
636 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200637 char *pdigits;
638 if (PyUnicode_READY(digits))
639 return -1;
640 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200641 if (PyUnicode_KIND(digits) < kind) {
642 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200643 if (pdigits == NULL)
644 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645 }
Victor Stinner90f50d42012-02-24 01:44:47 +0100646 r = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerd3f08822012-05-29 12:57:52 +0200647 writer->buffer, writer->pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200649 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200650 spec->n_digits, spec->n_min_width,
Victor Stinner41a863c2012-02-24 00:37:51 +0100651 locale->grouping, locale->thousands_sep, NULL);
Victor Stinner90f50d42012-02-24 01:44:47 +0100652 if (r == -1)
653 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654 assert(r == spec->n_grouped_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 if (PyUnicode_KIND(digits) < kind)
656 PyMem_Free(pdigits);
657 d_pos += spec->n_digits;
658 }
659 if (toupper) {
660 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500661 for (t = 0; t < spec->n_grouped_digits; t++) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200662 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100663 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500664 if (c > 127) {
665 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
666 return -1;
667 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200668 PyUnicode_WRITE(kind, data, writer->pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500669 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200670 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200671 writer->pos += spec->n_grouped_digits;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200672
673 if (spec->n_decimal) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200674 _PyUnicode_FastCopyCharacters(
675 writer->buffer, writer->pos,
676 locale->decimal_point, 0, spec->n_decimal);
677 writer->pos += spec->n_decimal;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 d_pos += 1;
679 }
680
681 if (spec->n_remainder) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200682 _PyUnicode_FastCopyCharacters(
683 writer->buffer, writer->pos,
684 digits, d_pos, spec->n_remainder);
685 writer->pos += spec->n_remainder;
Brett Cannon8a250fa2012-06-25 16:13:44 -0400686 /* d_pos += spec->n_remainder; */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 }
688
689 if (spec->n_rpadding) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200690 _PyUnicode_FastFill(writer->buffer,
691 writer->pos, spec->n_rpadding,
692 fill_char);
693 writer->pos += spec->n_rpadding;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200695 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200696}
697
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200698static const char no_grouping[1] = {CHAR_MAX};
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200699
700/* Find the decimal point character(s?), thousands_separator(s?), and
701 grouping description, either for the current locale if type is
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400702 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
703 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100704static int
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700705get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706{
707 switch (type) {
708 case LT_CURRENT_LOCALE: {
709 struct lconv *locale_data = localeconv();
Victor Stinner41a863c2012-02-24 00:37:51 +0100710 locale_info->decimal_point = PyUnicode_DecodeLocale(
711 locale_data->decimal_point,
712 NULL);
713 if (locale_info->decimal_point == NULL)
714 return -1;
715 locale_info->thousands_sep = PyUnicode_DecodeLocale(
716 locale_data->thousands_sep,
717 NULL);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700718 if (locale_info->thousands_sep == NULL)
Victor Stinner41a863c2012-02-24 00:37:51 +0100719 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720 locale_info->grouping = locale_data->grouping;
721 break;
722 }
723 case LT_DEFAULT_LOCALE:
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400724 case LT_UNDERSCORE_LOCALE:
725 case LT_UNDER_FOUR_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100726 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400727 locale_info->thousands_sep = PyUnicode_FromOrdinal(
728 type == LT_DEFAULT_LOCALE ? ',' : '_');
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700729 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100730 return -1;
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400731 if (type != LT_UNDER_FOUR_LOCALE)
732 locale_info->grouping = "\3"; /* Group every 3 characters. The
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733 (implicit) trailing 0 means repeat
734 infinitely. */
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400735 else
736 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200737 break;
738 case LT_NO_LOCALE:
Victor Stinner41a863c2012-02-24 00:37:51 +0100739 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
740 locale_info->thousands_sep = PyUnicode_New(0, 0);
Benjamin Peterson59e5e0d2016-09-13 22:43:45 -0700741 if (!locale_info->decimal_point || !locale_info->thousands_sep)
Victor Stinner41a863c2012-02-24 00:37:51 +0100742 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200743 locale_info->grouping = no_grouping;
744 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200745 }
Victor Stinner41a863c2012-02-24 00:37:51 +0100746 return 0;
747}
748
749static void
750free_locale_info(LocaleInfo *locale_info)
751{
752 Py_XDECREF(locale_info->decimal_point);
753 Py_XDECREF(locale_info->thousands_sep);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754}
755
756/************************************************************************/
757/*********** string formatting ******************************************/
758/************************************************************************/
759
Victor Stinnerd3f08822012-05-29 12:57:52 +0200760static int
761format_string_internal(PyObject *value, const InternalFormatSpec *format,
762 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763{
764 Py_ssize_t lpad;
765 Py_ssize_t rpad;
766 Py_ssize_t total;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200767 Py_ssize_t len;
768 int result = -1;
Victor Stinnerece58de2012-04-23 23:36:38 +0200769 Py_UCS4 maxchar;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200770
Victor Stinnerd3f08822012-05-29 12:57:52 +0200771 assert(PyUnicode_IS_READY(value));
772 len = PyUnicode_GET_LENGTH(value);
773
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200774 /* sign is not allowed on strings */
775 if (format->sign != '\0') {
776 PyErr_SetString(PyExc_ValueError,
777 "Sign not allowed in string format specifier");
778 goto done;
779 }
780
781 /* alternate is not allowed on strings */
782 if (format->alternate) {
783 PyErr_SetString(PyExc_ValueError,
784 "Alternate form (#) not allowed in string format "
785 "specifier");
786 goto done;
787 }
788
789 /* '=' alignment not allowed on strings */
790 if (format->align == '=') {
791 PyErr_SetString(PyExc_ValueError,
792 "'=' alignment not allowed "
793 "in string format specifier");
794 goto done;
795 }
796
Victor Stinner621ef3d2012-10-02 00:33:47 +0200797 if ((format->width == -1 || format->width <= len)
798 && (format->precision == -1 || format->precision >= len)) {
Victor Stinnerd3f08822012-05-29 12:57:52 +0200799 /* Fast path */
800 return _PyUnicodeWriter_WriteStr(writer, value);
801 }
802
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200803 /* if precision is specified, output no more that format.precision
804 characters */
805 if (format->precision >= 0 && len >= format->precision) {
806 len = format->precision;
807 }
808
809 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
810
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200811 maxchar = writer->maxchar;
Victor Stinnera4ac6002012-01-21 15:50:49 +0100812 if (lpad != 0 || rpad != 0)
813 maxchar = Py_MAX(maxchar, format->fill_char);
Victor Stinnereb4b5ac2013-04-03 02:02:33 +0200814 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
815 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
816 maxchar = Py_MAX(maxchar, valmaxchar);
817 }
Victor Stinnera4ac6002012-01-21 15:50:49 +0100818
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200819 /* allocate the resulting string */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200820 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200821 goto done;
822
823 /* Write into that space. First the padding. */
Eric V. Smith2ea97122014-04-14 11:55:10 -0400824 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +0200825 if (result == -1)
826 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200827
828 /* Then the source string. */
Victor Stinnerc9d369f2012-06-16 02:22:37 +0200829 if (len) {
830 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
831 value, 0, len);
832 }
Victor Stinnerd3f08822012-05-29 12:57:52 +0200833 writer->pos += (len + rpad);
834 result = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200835
836done:
837 return result;
838}
839
840
841/************************************************************************/
842/*********** long formatting ********************************************/
843/************************************************************************/
844
Victor Stinnerd3f08822012-05-29 12:57:52 +0200845static int
846format_long_internal(PyObject *value, const InternalFormatSpec *format,
847 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200848{
Victor Stinnerd3f08822012-05-29 12:57:52 +0200849 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100850 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200851 PyObject *tmp = NULL;
852 Py_ssize_t inumeric_chars;
853 Py_UCS4 sign_char = '\0';
854 Py_ssize_t n_digits; /* count of digits need from the computed
855 string */
856 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
857 produces non-digits */
858 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
859 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100860 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200861 NumberFieldWidths spec;
862 long x;
863
864 /* Locale settings, either from the actual locale or
865 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +0100866 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867
868 /* no precision allowed on integers */
869 if (format->precision != -1) {
870 PyErr_SetString(PyExc_ValueError,
871 "Precision not allowed in integer format specifier");
872 goto done;
873 }
874
875 /* special case for character formatting */
876 if (format->type == 'c') {
877 /* error to specify a sign */
878 if (format->sign != '\0') {
879 PyErr_SetString(PyExc_ValueError,
880 "Sign not allowed with integer"
881 " format specifier 'c'");
882 goto done;
883 }
Eric V. Smitha12572f2014-04-15 22:37:55 -0400884 /* error to request alternate format */
885 if (format->alternate) {
886 PyErr_SetString(PyExc_ValueError,
887 "Alternate form (#) not allowed with integer"
888 " format specifier 'c'");
889 goto done;
890 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200891
892 /* taken from unicodeobject.c formatchar() */
893 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200894 x = PyLong_AsLong(value);
895 if (x == -1 && PyErr_Occurred())
896 goto done;
897 if (x < 0 || x > 0x10ffff) {
898 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100899 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200900 goto done;
901 }
902 tmp = PyUnicode_FromOrdinal(x);
903 inumeric_chars = 0;
904 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100905 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200906
907 /* As a sort-of hack, we tell calc_number_widths that we only
908 have "remainder" characters. calc_number_widths thinks
909 these are characters that don't get formatted, only copied
910 into the output string. We do this for 'c' formatting,
911 because the characters are likely to be non-digits. */
912 n_remainder = 1;
913 }
914 else {
915 int base;
916 int leading_chars_to_skip = 0; /* Number of characters added by
917 PyNumber_ToBase that we want to
918 skip over. */
919
920 /* Compute the base and how many characters will be added by
921 PyNumber_ToBase */
922 switch (format->type) {
923 case 'b':
924 base = 2;
925 leading_chars_to_skip = 2; /* 0b */
926 break;
927 case 'o':
928 base = 8;
929 leading_chars_to_skip = 2; /* 0o */
930 break;
931 case 'x':
932 case 'X':
933 base = 16;
934 leading_chars_to_skip = 2; /* 0x */
935 break;
936 default: /* shouldn't be needed, but stops a compiler warning */
937 case 'd':
938 case 'n':
939 base = 10;
940 break;
941 }
942
Victor Stinnerd3f08822012-05-29 12:57:52 +0200943 if (format->sign != '+' && format->sign != ' '
944 && format->width == -1
945 && format->type != 'X' && format->type != 'n'
946 && !format->thousands_separators
947 && PyLong_CheckExact(value))
948 {
949 /* Fast path */
950 return _PyLong_FormatWriter(writer, value, base, format->alternate);
951 }
952
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200953 /* The number of prefix chars is the same as the leading
954 chars to skip */
955 if (format->alternate)
956 n_prefix = leading_chars_to_skip;
957
958 /* Do the hard part, converting to a string in a given base */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200959 tmp = _PyLong_Format(value, base);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
961 goto done;
962
963 inumeric_chars = 0;
964 n_digits = PyUnicode_GET_LENGTH(tmp);
965
966 prefix = inumeric_chars;
967
968 /* Is a sign character present in the output? If so, remember it
969 and skip it */
970 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
971 sign_char = '-';
972 ++prefix;
973 ++leading_chars_to_skip;
974 }
975
976 /* Skip over the leading chars (0x, 0b, etc.) */
977 n_digits -= leading_chars_to_skip;
978 inumeric_chars += leading_chars_to_skip;
979 }
980
981 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +0100982 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -0400983 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +0100984 &locale) == -1)
985 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200986
987 /* Calculate how much memory we'll need. */
988 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
Victor Stinner41a863c2012-02-24 00:37:51 +0100989 inumeric_chars + n_digits, n_remainder, 0,
990 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100991
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200993 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 goto done;
995
996 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200997 result = fill_number(writer, &spec,
998 tmp, inumeric_chars, inumeric_chars + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -0400999 tmp, prefix, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001000 &locale, format->type == 'X');
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001001
1002done:
1003 Py_XDECREF(tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001004 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 return result;
1006}
1007
1008/************************************************************************/
1009/*********** float formatting *******************************************/
1010/************************************************************************/
1011
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012/* much of this is taken from unicodeobject.c */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001013static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014format_float_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001015 const InternalFormatSpec *format,
1016 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001017{
1018 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1019 Py_ssize_t n_digits;
1020 Py_ssize_t n_remainder;
1021 Py_ssize_t n_total;
1022 int has_decimal;
1023 double val;
Victor Stinner76d38502013-06-24 23:34:15 +02001024 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 Py_UCS4 type = format->type;
1026 int add_pct = 0;
1027 Py_ssize_t index;
1028 NumberFieldWidths spec;
1029 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001030 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001031 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001032 Py_UCS4 sign_char = '\0';
1033 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1034 PyObject *unicode_tmp = NULL;
1035
1036 /* Locale settings, either from the actual locale or
1037 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001038 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001039
Victor Stinner2f084ec2013-06-23 14:54:30 +02001040 if (format->precision > INT_MAX) {
1041 PyErr_SetString(PyExc_ValueError, "precision too big");
1042 goto done;
1043 }
1044 precision = (int)format->precision;
1045
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001046 if (format->alternate)
1047 flags |= Py_DTSF_ALT;
1048
1049 if (type == '\0') {
1050 /* Omitted type specifier. Behaves in the same way as repr(x)
1051 and str(x) if no precision is given, else like 'g', but with
1052 at least one digit after the decimal point. */
1053 flags |= Py_DTSF_ADD_DOT_0;
1054 type = 'r';
1055 default_precision = 0;
1056 }
1057
1058 if (type == 'n')
1059 /* 'n' is the same as 'g', except for the locale used to
1060 format the result. We take care of that later. */
1061 type = 'g';
1062
1063 val = PyFloat_AsDouble(value);
1064 if (val == -1.0 && PyErr_Occurred())
1065 goto done;
1066
1067 if (type == '%') {
1068 type = 'f';
1069 val *= 100;
1070 add_pct = 1;
1071 }
1072
1073 if (precision < 0)
1074 precision = default_precision;
1075 else if (type == 'r')
1076 type = 'g';
1077
Martin Panter4c359642016-05-08 13:53:41 +00001078 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001079 8-bit char. This is safe, because we've restricted what "type"
1080 can be. */
1081 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1082 &float_type);
1083 if (buf == NULL)
1084 goto done;
1085 n_digits = strlen(buf);
1086
1087 if (add_pct) {
1088 /* We know that buf has a trailing zero (since we just called
1089 strlen() on it), and we don't use that fact any more. So we
1090 can just write over the trailing zero. */
1091 buf[n_digits] = '%';
1092 n_digits += 1;
1093 }
1094
Victor Stinnerd3f08822012-05-29 12:57:52 +02001095 if (format->sign != '+' && format->sign != ' '
1096 && format->width == -1
1097 && format->type != 'n'
1098 && !format->thousands_separators)
1099 {
1100 /* Fast path */
Victor Stinner4a587072013-11-19 12:54:53 +01001101 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1102 PyMem_Free(buf);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001103 return result;
1104 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001105
Victor Stinner4a587072013-11-19 12:54:53 +01001106 /* Since there is no unicode version of PyOS_double_to_string,
1107 just use the 8 bit version and then convert to unicode. */
1108 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1109 PyMem_Free(buf);
1110 if (unicode_tmp == NULL)
1111 goto done;
1112
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 /* Is a sign character present in the output? If so, remember it
1114 and skip it */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001115 index = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001116 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1117 sign_char = '-';
1118 ++index;
1119 --n_digits;
1120 }
1121
1122 /* Determine if we have any "remainder" (after the digits, might include
1123 decimal or exponent or both (or neither)) */
1124 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1125
1126 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001127 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001128 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001129 &locale) == -1)
1130 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001131
1132 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001133 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 index + n_digits, n_remainder, has_decimal,
Victor Stinner41a863c2012-02-24 00:37:51 +01001135 &locale, format, &maxchar);
Victor Stinnera4ac6002012-01-21 15:50:49 +01001136
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001137 /* Allocate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001138 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 goto done;
1140
1141 /* Populate the memory. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001142 result = fill_number(writer, &spec,
1143 unicode_tmp, index, index + n_digits,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001144 NULL, 0, format->fill_char,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001145 &locale, 0);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146
1147done:
Stefan Krahd9c1bf72012-09-06 13:02:46 +02001148 Py_XDECREF(unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001149 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 return result;
1151}
1152
1153/************************************************************************/
1154/*********** complex formatting *****************************************/
1155/************************************************************************/
1156
Victor Stinnerd3f08822012-05-29 12:57:52 +02001157static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158format_complex_internal(PyObject *value,
Victor Stinnerd3f08822012-05-29 12:57:52 +02001159 const InternalFormatSpec *format,
1160 _PyUnicodeWriter *writer)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001161{
1162 double re;
1163 double im;
1164 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1165 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1166
1167 InternalFormatSpec tmp_format = *format;
1168 Py_ssize_t n_re_digits;
1169 Py_ssize_t n_im_digits;
1170 Py_ssize_t n_re_remainder;
1171 Py_ssize_t n_im_remainder;
1172 Py_ssize_t n_re_total;
1173 Py_ssize_t n_im_total;
1174 int re_has_decimal;
1175 int im_has_decimal;
Victor Stinner76d38502013-06-24 23:34:15 +02001176 int precision, default_precision = 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001177 Py_UCS4 type = format->type;
1178 Py_ssize_t i_re;
1179 Py_ssize_t i_im;
1180 NumberFieldWidths re_spec;
1181 NumberFieldWidths im_spec;
1182 int flags = 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001183 int result = -1;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001184 Py_UCS4 maxchar = 127;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001185 enum PyUnicode_Kind rkind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001186 void *rdata;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001187 Py_UCS4 re_sign_char = '\0';
1188 Py_UCS4 im_sign_char = '\0';
1189 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1190 int im_float_type;
1191 int add_parens = 0;
1192 int skip_re = 0;
1193 Py_ssize_t lpad;
1194 Py_ssize_t rpad;
1195 Py_ssize_t total;
1196 PyObject *re_unicode_tmp = NULL;
1197 PyObject *im_unicode_tmp = NULL;
1198
1199 /* Locale settings, either from the actual locale or
1200 from a hard-code pseudo-locale */
Victor Stinner41a863c2012-02-24 00:37:51 +01001201 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001202
Victor Stinner2f084ec2013-06-23 14:54:30 +02001203 if (format->precision > INT_MAX) {
1204 PyErr_SetString(PyExc_ValueError, "precision too big");
1205 goto done;
1206 }
1207 precision = (int)format->precision;
1208
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001209 /* Zero padding is not allowed. */
1210 if (format->fill_char == '0') {
1211 PyErr_SetString(PyExc_ValueError,
1212 "Zero padding is not allowed in complex format "
1213 "specifier");
1214 goto done;
1215 }
1216
1217 /* Neither is '=' alignment . */
1218 if (format->align == '=') {
1219 PyErr_SetString(PyExc_ValueError,
1220 "'=' alignment flag is not allowed in complex format "
1221 "specifier");
1222 goto done;
1223 }
1224
1225 re = PyComplex_RealAsDouble(value);
1226 if (re == -1.0 && PyErr_Occurred())
1227 goto done;
1228 im = PyComplex_ImagAsDouble(value);
1229 if (im == -1.0 && PyErr_Occurred())
1230 goto done;
1231
1232 if (format->alternate)
1233 flags |= Py_DTSF_ALT;
1234
1235 if (type == '\0') {
1236 /* Omitted type specifier. Should be like str(self). */
1237 type = 'r';
1238 default_precision = 0;
1239 if (re == 0.0 && copysign(1.0, re) == 1.0)
1240 skip_re = 1;
1241 else
1242 add_parens = 1;
1243 }
1244
1245 if (type == 'n')
1246 /* 'n' is the same as 'g', except for the locale used to
1247 format the result. We take care of that later. */
1248 type = 'g';
1249
1250 if (precision < 0)
1251 precision = default_precision;
1252 else if (type == 'r')
1253 type = 'g';
1254
Martin Panter4c359642016-05-08 13:53:41 +00001255 /* Cast "type", because if we're in unicode we need to pass an
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001256 8-bit char. This is safe, because we've restricted what "type"
1257 can be. */
1258 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1259 &re_float_type);
1260 if (re_buf == NULL)
1261 goto done;
1262 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1263 &im_float_type);
1264 if (im_buf == NULL)
1265 goto done;
1266
1267 n_re_digits = strlen(re_buf);
1268 n_im_digits = strlen(im_buf);
1269
1270 /* Since there is no unicode version of PyOS_double_to_string,
1271 just use the 8 bit version and then convert to unicode. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001272 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273 if (re_unicode_tmp == NULL)
1274 goto done;
1275 i_re = 0;
1276
Victor Stinnerd3f08822012-05-29 12:57:52 +02001277 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 if (im_unicode_tmp == NULL)
1279 goto done;
1280 i_im = 0;
1281
1282 /* Is a sign character present in the output? If so, remember it
1283 and skip it */
1284 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1285 re_sign_char = '-';
1286 ++i_re;
1287 --n_re_digits;
1288 }
1289 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1290 im_sign_char = '-';
1291 ++i_im;
1292 --n_im_digits;
1293 }
1294
1295 /* Determine if we have any "remainder" (after the digits, might include
1296 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001297 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001299 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300 &n_im_remainder, &im_has_decimal);
1301
1302 /* Determine the grouping, separator, and decimal point, if any. */
Victor Stinner41a863c2012-02-24 00:37:51 +01001303 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
Eric V. Smith89e1b1a2016-09-09 23:06:47 -04001304 format->thousands_separators,
Victor Stinner41a863c2012-02-24 00:37:51 +01001305 &locale) == -1)
1306 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307
1308 /* Turn off any padding. We'll do it later after we've composed
1309 the numbers without padding. */
1310 tmp_format.fill_char = '\0';
1311 tmp_format.align = '<';
1312 tmp_format.width = -1;
1313
1314 /* Calculate how much memory we'll need. */
1315 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1316 i_re, i_re + n_re_digits, n_re_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001317 re_has_decimal, &locale, &tmp_format,
1318 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319
1320 /* Same formatting, but always include a sign, unless the real part is
1321 * going to be omitted, in which case we use whatever sign convention was
1322 * requested by the original format. */
1323 if (!skip_re)
1324 tmp_format.sign = '+';
1325 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1326 i_im, i_im + n_im_digits, n_im_remainder,
Victor Stinner41a863c2012-02-24 00:37:51 +01001327 im_has_decimal, &locale, &tmp_format,
1328 &maxchar);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329
1330 if (skip_re)
1331 n_re_total = 0;
1332
1333 /* Add 1 for the 'j', and optionally 2 for parens. */
1334 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1335 format->width, format->align, &lpad, &rpad, &total);
1336
Victor Stinner41a863c2012-02-24 00:37:51 +01001337 if (lpad || rpad)
Victor Stinnera4ac6002012-01-21 15:50:49 +01001338 maxchar = Py_MAX(maxchar, format->fill_char);
1339
Victor Stinnerd3f08822012-05-29 12:57:52 +02001340 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001342 rkind = writer->kind;
1343 rdata = writer->data;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001344
1345 /* Populate the memory. First, the padding. */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001346 result = fill_padding(writer,
1347 n_re_total + n_im_total + 1 + add_parens * 2,
Eric V. Smith2ea97122014-04-14 11:55:10 -04001348 format->fill_char, lpad, rpad);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001349 if (result == -1)
1350 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351
Victor Stinnerd3f08822012-05-29 12:57:52 +02001352 if (add_parens) {
1353 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1354 writer->pos++;
1355 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356
1357 if (!skip_re) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001358 result = fill_number(writer, &re_spec,
1359 re_unicode_tmp, i_re, i_re + n_re_digits,
1360 NULL, 0,
1361 0,
1362 &locale, 0);
1363 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001364 goto done;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365 }
Victor Stinnerd3f08822012-05-29 12:57:52 +02001366 result = fill_number(writer, &im_spec,
1367 im_unicode_tmp, i_im, i_im + n_im_digits,
1368 NULL, 0,
1369 0,
1370 &locale, 0);
1371 if (result == -1)
Victor Stinnerafbaa202011-09-28 21:50:16 +02001372 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001373 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1374 writer->pos++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001375
Victor Stinnerd3f08822012-05-29 12:57:52 +02001376 if (add_parens) {
1377 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1378 writer->pos++;
1379 }
1380
1381 writer->pos += rpad;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001382
1383done:
1384 PyMem_Free(re_buf);
1385 PyMem_Free(im_buf);
1386 Py_XDECREF(re_unicode_tmp);
1387 Py_XDECREF(im_unicode_tmp);
Victor Stinner41a863c2012-02-24 00:37:51 +01001388 free_locale_info(&locale);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001389 return result;
1390}
1391
1392/************************************************************************/
1393/*********** built in formatters ****************************************/
1394/************************************************************************/
doko@ubuntu.com39378f72012-06-21 12:12:20 +02001395static int
Victor Stinnerd3f08822012-05-29 12:57:52 +02001396format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1397{
1398 PyObject *str;
1399 int err;
1400
1401 str = PyObject_Str(obj);
1402 if (str == NULL)
1403 return -1;
1404 err = _PyUnicodeWriter_WriteStr(writer, str);
1405 Py_DECREF(str);
1406 return err;
1407}
1408
1409int
1410_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1411 PyObject *obj,
1412 PyObject *format_spec,
1413 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001414{
1415 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001416
1417 assert(PyUnicode_Check(obj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001418
1419 /* check for the special case of zero length format spec, make
1420 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001421 if (start == end) {
1422 if (PyUnicode_CheckExact(obj))
1423 return _PyUnicodeWriter_WriteStr(writer, obj);
1424 else
1425 return format_obj(obj, writer);
1426 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001427
1428 /* parse the format_spec */
1429 if (!parse_internal_render_format_spec(format_spec, start, end,
1430 &format, 's', '<'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001431 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001432
1433 /* type conversion? */
1434 switch (format.type) {
1435 case 's':
1436 /* no type conversion needed, already a string. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001437 return format_string_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001438 default:
1439 /* unknown */
1440 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001441 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001442 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443}
1444
Victor Stinnerd3f08822012-05-29 12:57:52 +02001445int
1446_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1447 PyObject *obj,
1448 PyObject *format_spec,
1449 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001450{
Victor Stinnerd3f08822012-05-29 12:57:52 +02001451 PyObject *tmp = NULL, *str = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001452 InternalFormatSpec format;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001453 int result = -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001454
1455 /* check for the special case of zero length format spec, make
1456 it equivalent to str(obj) */
1457 if (start == end) {
Victor Stinnerd3f08822012-05-29 12:57:52 +02001458 if (PyLong_CheckExact(obj))
1459 return _PyLong_FormatWriter(writer, obj, 10, 0);
1460 else
1461 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001462 }
1463
1464 /* parse the format_spec */
1465 if (!parse_internal_render_format_spec(format_spec, start, end,
1466 &format, 'd', '>'))
1467 goto done;
1468
1469 /* type conversion? */
1470 switch (format.type) {
1471 case 'b':
1472 case 'c':
1473 case 'd':
1474 case 'o':
1475 case 'x':
1476 case 'X':
1477 case 'n':
Serhiy Storchaka95949422013-08-27 19:40:23 +03001478 /* no type conversion needed, already an int. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001479 result = format_long_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001480 break;
1481
1482 case 'e':
1483 case 'E':
1484 case 'f':
1485 case 'F':
1486 case 'g':
1487 case 'G':
1488 case '%':
1489 /* convert to float */
1490 tmp = PyNumber_Float(obj);
1491 if (tmp == NULL)
1492 goto done;
Victor Stinnerd3f08822012-05-29 12:57:52 +02001493 result = format_float_internal(tmp, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001494 break;
1495
1496 default:
1497 /* unknown */
1498 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1499 goto done;
1500 }
1501
1502done:
1503 Py_XDECREF(tmp);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001504 Py_XDECREF(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001505 return result;
1506}
1507
Victor Stinnerd3f08822012-05-29 12:57:52 +02001508int
1509_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1510 PyObject *obj,
1511 PyObject *format_spec,
1512 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001513{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001514 InternalFormatSpec format;
1515
1516 /* check for the special case of zero length format spec, make
1517 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001518 if (start == end)
1519 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001520
1521 /* parse the format_spec */
1522 if (!parse_internal_render_format_spec(format_spec, start, end,
1523 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001524 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001525
1526 /* type conversion? */
1527 switch (format.type) {
1528 case '\0': /* No format code: like 'g', but with at least one decimal. */
1529 case 'e':
1530 case 'E':
1531 case 'f':
1532 case 'F':
1533 case 'g':
1534 case 'G':
1535 case 'n':
1536 case '%':
1537 /* no conversion, already a float. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001538 return format_float_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001539
1540 default:
1541 /* unknown */
1542 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001543 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001544 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001545}
1546
Victor Stinnerd3f08822012-05-29 12:57:52 +02001547int
1548_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1549 PyObject *obj,
1550 PyObject *format_spec,
1551 Py_ssize_t start, Py_ssize_t end)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001552{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001553 InternalFormatSpec format;
1554
1555 /* check for the special case of zero length format spec, make
1556 it equivalent to str(obj) */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001557 if (start == end)
1558 return format_obj(obj, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001559
1560 /* parse the format_spec */
1561 if (!parse_internal_render_format_spec(format_spec, start, end,
1562 &format, '\0', '>'))
Victor Stinnerd3f08822012-05-29 12:57:52 +02001563 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001564
1565 /* type conversion? */
1566 switch (format.type) {
1567 case '\0': /* No format code: like 'g', but with at least one decimal. */
1568 case 'e':
1569 case 'E':
1570 case 'f':
1571 case 'F':
1572 case 'g':
1573 case 'G':
1574 case 'n':
1575 /* no conversion, already a complex. do the formatting */
Victor Stinnerd3f08822012-05-29 12:57:52 +02001576 return format_complex_internal(obj, &format, writer);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001577
1578 default:
1579 /* unknown */
1580 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerd3f08822012-05-29 12:57:52 +02001581 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001582 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001583}