blob: ed716a5b971747b78b0db243d21c4512c0196d49 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
Mark Dickinson47862d42011-12-01 15:27:04 +000054 Py_ssize_t accumulator, digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020055 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
Mark Dickinson47862d42011-12-01 15:27:04 +000064 Detect possible overflow before it happens:
65
66 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
67 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 */
Mark Dickinson47862d42011-12-01 15:27:04 +000069 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070 PyErr_Format(PyExc_ValueError,
71 "Too many decimal digits in format string");
72 return -1;
73 }
Mark Dickinson47862d42011-12-01 15:27:04 +000074 accumulator = accumulator * 10 + digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020075 }
76 *result = accumulator;
77 return numdigits;
78}
79
80/************************************************************************/
81/*********** standard format specifier parsing **************************/
82/************************************************************************/
83
84/* returns true if this character is a specifier alignment token */
85Py_LOCAL_INLINE(int)
86is_alignment_token(Py_UCS4 c)
87{
88 switch (c) {
89 case '<': case '>': case '=': case '^':
90 return 1;
91 default:
92 return 0;
93 }
94}
95
96/* returns true if this character is a sign element */
97Py_LOCAL_INLINE(int)
98is_sign_element(Py_UCS4 c)
99{
100 switch (c) {
101 case ' ': case '+': case '-':
102 return 1;
103 default:
104 return 0;
105 }
106}
Eric Smith8c663262007-08-25 02:26:07 +0000107
Eric Smith4a7d76d2008-05-30 18:10:19 +0000108
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200109typedef struct {
110 Py_UCS4 fill_char;
111 Py_UCS4 align;
112 int alternate;
113 Py_UCS4 sign;
114 Py_ssize_t width;
115 int thousands_separators;
116 Py_ssize_t precision;
117 Py_UCS4 type;
118} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200120#if 0
121/* Occassionally useful for debugging. Should normally be commented out. */
122static void
123DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
124{
125 printf("internal format spec: fill_char %d\n", format->fill_char);
126 printf("internal format spec: align %d\n", format->align);
127 printf("internal format spec: alternate %d\n", format->alternate);
128 printf("internal format spec: sign %d\n", format->sign);
129 printf("internal format spec: width %zd\n", format->width);
130 printf("internal format spec: thousands_separators %d\n",
131 format->thousands_separators);
132 printf("internal format spec: precision %zd\n", format->precision);
133 printf("internal format spec: type %c\n", format->type);
134 printf("\n");
135}
136#endif
137
138
139/*
140 ptr points to the start of the format_spec, end points just past its end.
141 fills in format with the parsed information.
142 returns 1 on success, 0 on failure.
143 if failure, sets the exception
144*/
145static int
146parse_internal_render_format_spec(PyObject *format_spec,
147 Py_ssize_t start, Py_ssize_t end,
148 InternalFormatSpec *format,
149 char default_type,
150 char default_align)
151{
152 Py_ssize_t pos = start;
153 /* end-pos is used throughout this code to specify the length of
154 the input string */
155#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
156
157 Py_ssize_t consumed;
158 int align_specified = 0;
159
160 format->fill_char = '\0';
161 format->align = default_align;
162 format->alternate = 0;
163 format->sign = '\0';
164 format->width = -1;
165 format->thousands_separators = 0;
166 format->precision = -1;
167 format->type = default_type;
168
169 /* If the second char is an alignment token,
170 then parse the fill char */
171 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
172 format->align = READ_spec(pos+1);
173 format->fill_char = READ_spec(pos);
174 align_specified = 1;
175 pos += 2;
176 }
177 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
178 format->align = READ_spec(pos);
179 align_specified = 1;
180 ++pos;
181 }
182
183 /* Parse the various sign options */
184 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
185 format->sign = READ_spec(pos);
186 ++pos;
187 }
188
189 /* If the next character is #, we're in alternate mode. This only
190 applies to integers. */
191 if (end-pos >= 1 && READ_spec(pos) == '#') {
192 format->alternate = 1;
193 ++pos;
194 }
195
196 /* The special case for 0-padding (backwards compat) */
197 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
198 format->fill_char = '0';
199 if (!align_specified) {
200 format->align = '=';
201 }
202 ++pos;
203 }
204
205 consumed = get_integer(format_spec, &pos, end, &format->width);
206 if (consumed == -1)
207 /* Overflow error. Exception already set. */
208 return 0;
209
210 /* If consumed is 0, we didn't consume any characters for the
211 width. In that case, reset the width to -1, because
212 get_integer() will have set it to zero. -1 is how we record
213 that the width wasn't specified. */
214 if (consumed == 0)
215 format->width = -1;
216
217 /* Comma signifies add thousands separators */
218 if (end-pos && READ_spec(pos) == ',') {
219 format->thousands_separators = 1;
220 ++pos;
221 }
222
223 /* Parse field precision */
224 if (end-pos && READ_spec(pos) == '.') {
225 ++pos;
226
227 consumed = get_integer(format_spec, &pos, end, &format->precision);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* Not having a precision after a dot is an error. */
233 if (consumed == 0) {
234 PyErr_Format(PyExc_ValueError,
235 "Format specifier missing precision");
236 return 0;
237 }
238
239 }
240
241 /* Finally, parse the type field. */
242
243 if (end-pos > 1) {
Eric V. Smithd25cfe62012-01-19 20:04:28 -0500244 /* More than one char remain, invalid format specifier. */
245 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 return 0;
247 }
248
249 if (end-pos == 1) {
250 format->type = READ_spec(pos);
251 ++pos;
252 }
253
254 /* Do as much validating as we can, just by looking at the format
255 specifier. Do not take into account what type of formatting
256 we're doing (int, float, string). */
257
258 if (format->thousands_separators) {
259 switch (format->type) {
260 case 'd':
261 case 'e':
262 case 'f':
263 case 'g':
264 case 'E':
265 case 'G':
266 case '%':
267 case 'F':
268 case '\0':
269 /* These are allowed. See PEP 378.*/
270 break;
271 default:
272 invalid_comma_type(format->type);
273 return 0;
274 }
275 }
276
Victor Stinnera4ac6002012-01-21 15:50:49 +0100277 assert (format->align <= 127);
278 assert (format->sign <= 127);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 return 1;
280}
281
282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* Figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else if (align == '<' || align == '=')
306 *n_lpadding = 0;
307 else {
308 /* We should never have an unspecified alignment. */
309 *n_lpadding = 0;
310 assert(0);
311 }
312
313 *n_rpadding = *n_total - nchars - *n_lpadding;
314}
315
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200316/* Do the padding, and return a pointer to where the caller-supplied
317 content goes. */
318static Py_ssize_t
319fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
320 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
321 Py_ssize_t n_rpadding)
322{
323 /* Pad on left. */
324 if (n_lpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100325 PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326
327 /* Pad on right. */
328 if (n_rpadding)
Victor Stinner3fe55312012-01-04 00:33:50 +0100329 PyUnicode_Fill(s, start + nchars + n_lpadding,
330 start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331
332 /* Pointer to the user content. */
333 return start + n_lpadding;
334}
335
336/************************************************************************/
337/*********** common routines for numeric formatting *********************/
338/************************************************************************/
339
340/* Locale type codes. */
341#define LT_CURRENT_LOCALE 0
342#define LT_DEFAULT_LOCALE 1
343#define LT_NO_LOCALE 2
344
345/* Locale info needed for formatting integers and the part of floats
346 before and including the decimal. Note that locales only support
347 8-bit chars, not unicode. */
348typedef struct {
349 char *decimal_point;
350 char *thousands_sep;
351 char *grouping;
352} LocaleInfo;
353
354/* describes the layout for an integer, see the comment in
355 calc_number_widths() for details */
356typedef struct {
357 Py_ssize_t n_lpadding;
358 Py_ssize_t n_prefix;
359 Py_ssize_t n_spadding;
360 Py_ssize_t n_rpadding;
361 char sign;
362 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
363 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
364 any grouping chars. */
365 Py_ssize_t n_decimal; /* 0 if only an integer */
366 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
367 excluding the decimal itself, if
368 present. */
369
370 /* These 2 are not the widths of fields, but are needed by
371 STRINGLIB_GROUPING. */
372 Py_ssize_t n_digits; /* The number of digits before a decimal
373 or exponent. */
374 Py_ssize_t n_min_width; /* The min_width we used when we computed
375 the n_grouped_digits width. */
376} NumberFieldWidths;
377
378
379/* Given a number of the form:
380 digits[remainder]
381 where ptr points to the start and end points to the end, find where
382 the integer part ends. This could be a decimal, an exponent, both,
383 or neither.
384 If a decimal point is present, set *has_decimal and increment
385 remainder beyond it.
386 Results are undefined (but shouldn't crash) for improperly
387 formatted strings.
388*/
389static void
390parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
391 Py_ssize_t *n_remainder, int *has_decimal)
392{
393 Py_ssize_t remainder;
394
395 while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
396 ++pos;
397 remainder = pos;
398
399 /* Does remainder start with a decimal point? */
400 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
401
402 /* Skip the decimal point. */
403 if (*has_decimal)
404 remainder++;
405
406 *n_remainder = end - remainder;
407}
408
409/* not all fields of format are used. for example, precision is
410 unused. should this take discrete params in order to be more clear
411 about what it does? or is passing a single format parameter easier
412 and more efficient enough to justify a little obfuscation? */
413static Py_ssize_t
414calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
415 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
416 Py_ssize_t n_end, Py_ssize_t n_remainder,
417 int has_decimal, const LocaleInfo *locale,
418 const InternalFormatSpec *format)
419{
420 Py_ssize_t n_non_digit_non_padding;
421 Py_ssize_t n_padding;
422
423 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
424 spec->n_lpadding = 0;
425 spec->n_prefix = n_prefix;
426 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
427 spec->n_remainder = n_remainder;
428 spec->n_spadding = 0;
429 spec->n_rpadding = 0;
430 spec->sign = '\0';
431 spec->n_sign = 0;
432
433 /* the output will look like:
434 | |
435 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
436 | |
437
438 sign is computed from format->sign and the actual
439 sign of the number
440
441 prefix is given (it's for the '0x' prefix)
442
443 digits is already known
444
445 the total width is either given, or computed from the
446 actual digits
447
448 only one of lpadding, spadding, and rpadding can be non-zero,
449 and it's calculated from the width and other fields
450 */
451
452 /* compute the various parts we're going to write */
453 switch (format->sign) {
454 case '+':
455 /* always put a + or - */
456 spec->n_sign = 1;
457 spec->sign = (sign_char == '-' ? '-' : '+');
458 break;
459 case ' ':
460 spec->n_sign = 1;
461 spec->sign = (sign_char == '-' ? '-' : ' ');
462 break;
463 default:
464 /* Not specified, or the default (-) */
465 if (sign_char == '-') {
466 spec->n_sign = 1;
467 spec->sign = '-';
468 }
469 }
470
471 /* The number of chars used for non-digits and non-padding. */
472 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
473 spec->n_remainder;
474
475 /* min_width can go negative, that's okay. format->width == -1 means
476 we don't care. */
477 if (format->fill_char == '0' && format->align == '=')
478 spec->n_min_width = format->width - n_non_digit_non_padding;
479 else
480 spec->n_min_width = 0;
481
482 if (spec->n_digits == 0)
483 /* This case only occurs when using 'c' formatting, we need
484 to special case it because the grouping code always wants
485 to have at least one character. */
486 spec->n_grouped_digits = 0;
487 else
488 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinnerc3cec782011-10-05 21:24:08 +0200489 NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200490 spec->n_digits, spec->n_min_width,
491 locale->grouping, locale->thousands_sep);
492
493 /* Given the desired width and the total of digit and non-digit
494 space we consume, see if we need any padding. format->width can
495 be negative (meaning no padding), but this code still works in
496 that case. */
497 n_padding = format->width -
498 (n_non_digit_non_padding + spec->n_grouped_digits);
499 if (n_padding > 0) {
500 /* Some padding is needed. Determine if it's left, space, or right. */
501 switch (format->align) {
502 case '<':
503 spec->n_rpadding = n_padding;
504 break;
505 case '^':
506 spec->n_lpadding = n_padding / 2;
507 spec->n_rpadding = n_padding - spec->n_lpadding;
508 break;
509 case '=':
510 spec->n_spadding = n_padding;
511 break;
512 case '>':
513 spec->n_lpadding = n_padding;
514 break;
515 default:
516 /* Shouldn't get here, but treat it as '>' */
517 spec->n_lpadding = n_padding;
518 assert(0);
519 break;
520 }
521 }
522 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
523 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
524 spec->n_remainder + spec->n_rpadding;
525}
526
527/* Fill in the digit parts of a numbers's string representation,
528 as determined in calc_number_widths().
Victor Stinnerafbaa202011-09-28 21:50:16 +0200529 Return -1 on error, or 0 on success. */
530static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200531fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
532 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinnerafbaa202011-09-28 21:50:16 +0200533 PyObject *prefix, Py_ssize_t p_start,
534 Py_UCS4 fill_char,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200535 LocaleInfo *locale, int toupper)
536{
537 /* Used to keep track of digits, decimal, and remainder. */
538 Py_ssize_t d_pos = d_start;
539 unsigned int kind = PyUnicode_KIND(out);
540 void *data = PyUnicode_DATA(out);
541
542#ifndef NDEBUG
543 Py_ssize_t r;
544#endif
545
546 if (spec->n_lpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100547 PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200548 pos += spec->n_lpadding;
549 }
550 if (spec->n_sign == 1) {
551 PyUnicode_WRITE(kind, data, pos++, spec->sign);
552 }
553 if (spec->n_prefix) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200554 if (PyUnicode_CopyCharacters(out, pos,
555 prefix, p_start,
556 spec->n_prefix) < 0)
557 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200558 if (toupper) {
559 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500560 for (t = 0; t < spec->n_prefix; t++) {
561 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100562 c = Py_TOUPPER(c);
Victor Stinnera4ac6002012-01-21 15:50:49 +0100563 assert (c <= 127);
Victor Stinnered277852012-02-01 00:22:23 +0100564 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500565 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200566 }
567 pos += spec->n_prefix;
568 }
569 if (spec->n_spadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100570 PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 pos += spec->n_spadding;
572 }
573
574 /* Only for type 'c' special case, it has no digits. */
575 if (spec->n_digits != 0) {
576 /* Fill the digits with InsertThousandsGrouping. */
Victor Stinnerdba2dee2011-09-28 21:50:42 +0200577 char *pdigits;
578 if (PyUnicode_READY(digits))
579 return -1;
580 pdigits = PyUnicode_DATA(digits);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 if (PyUnicode_KIND(digits) < kind) {
582 pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinnerafbaa202011-09-28 21:50:16 +0200583 if (pdigits == NULL)
584 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200585 }
586#ifndef NDEBUG
587 r =
588#endif
589 _PyUnicode_InsertThousandsGrouping(
Victor Stinnerc3cec782011-10-05 21:24:08 +0200590 out, kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200591 (char*)data + kind * pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200592 spec->n_grouped_digits,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200593 pdigits + kind * d_pos,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200594 spec->n_digits, spec->n_min_width,
595 locale->grouping, locale->thousands_sep);
596#ifndef NDEBUG
597 assert(r == spec->n_grouped_digits);
598#endif
599 if (PyUnicode_KIND(digits) < kind)
600 PyMem_Free(pdigits);
601 d_pos += spec->n_digits;
602 }
603 if (toupper) {
604 Py_ssize_t t;
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500605 for (t = 0; t < spec->n_grouped_digits; t++) {
606 Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinnered277852012-02-01 00:22:23 +0100607 c = Py_TOUPPER(c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500608 if (c > 127) {
609 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
610 return -1;
611 }
Victor Stinnered277852012-02-01 00:22:23 +0100612 PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson21e0da22012-01-11 21:00:42 -0500613 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200614 }
615 pos += spec->n_grouped_digits;
616
617 if (spec->n_decimal) {
618 Py_ssize_t t;
619 for (t = 0; t < spec->n_decimal; ++t)
620 PyUnicode_WRITE(kind, data, pos + t,
621 locale->decimal_point[t]);
622 pos += spec->n_decimal;
623 d_pos += 1;
624 }
625
626 if (spec->n_remainder) {
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200627 if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
628 return -1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 pos += spec->n_remainder;
630 d_pos += spec->n_remainder;
631 }
632
633 if (spec->n_rpadding) {
Victor Stinner3fe55312012-01-04 00:33:50 +0100634 PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200635 pos += spec->n_rpadding;
636 }
Victor Stinnerafbaa202011-09-28 21:50:16 +0200637 return 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200638}
639
640static char no_grouping[1] = {CHAR_MAX};
641
642/* Find the decimal point character(s?), thousands_separator(s?), and
643 grouping description, either for the current locale if type is
644 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
645 none if LT_NO_LOCALE. */
646static void
647get_locale_info(int type, LocaleInfo *locale_info)
648{
649 switch (type) {
650 case LT_CURRENT_LOCALE: {
651 struct lconv *locale_data = localeconv();
652 locale_info->decimal_point = locale_data->decimal_point;
653 locale_info->thousands_sep = locale_data->thousands_sep;
654 locale_info->grouping = locale_data->grouping;
655 break;
656 }
657 case LT_DEFAULT_LOCALE:
658 locale_info->decimal_point = ".";
659 locale_info->thousands_sep = ",";
660 locale_info->grouping = "\3"; /* Group every 3 characters. The
661 (implicit) trailing 0 means repeat
662 infinitely. */
663 break;
664 case LT_NO_LOCALE:
665 locale_info->decimal_point = ".";
666 locale_info->thousands_sep = "";
667 locale_info->grouping = no_grouping;
668 break;
669 default:
670 assert(0);
671 }
672}
673
674/************************************************************************/
675/*********** string formatting ******************************************/
676/************************************************************************/
677
678static PyObject *
679format_string_internal(PyObject *value, const InternalFormatSpec *format)
680{
681 Py_ssize_t lpad;
682 Py_ssize_t rpad;
683 Py_ssize_t total;
684 Py_ssize_t pos;
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200685 Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200686 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100687 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200688
689 /* sign is not allowed on strings */
690 if (format->sign != '\0') {
691 PyErr_SetString(PyExc_ValueError,
692 "Sign not allowed in string format specifier");
693 goto done;
694 }
695
696 /* alternate is not allowed on strings */
697 if (format->alternate) {
698 PyErr_SetString(PyExc_ValueError,
699 "Alternate form (#) not allowed in string format "
700 "specifier");
701 goto done;
702 }
703
704 /* '=' alignment not allowed on strings */
705 if (format->align == '=') {
706 PyErr_SetString(PyExc_ValueError,
707 "'=' alignment not allowed "
708 "in string format specifier");
709 goto done;
710 }
711
712 /* if precision is specified, output no more that format.precision
713 characters */
714 if (format->precision >= 0 && len >= format->precision) {
715 len = format->precision;
716 }
717
718 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
719
Victor Stinnera4ac6002012-01-21 15:50:49 +0100720 if (lpad != 0 || rpad != 0)
721 maxchar = Py_MAX(maxchar, format->fill_char);
722
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200723 /* allocate the resulting string */
724 result = PyUnicode_New(total, maxchar);
725 if (result == NULL)
726 goto done;
727
728 /* Write into that space. First the padding. */
729 pos = fill_padding(result, 0, len,
730 format->fill_char=='\0'?' ':format->fill_char,
731 lpad, rpad);
732
733 /* Then the source string. */
Victor Stinnerfd85c3a2011-09-28 21:53:49 +0200734 if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
735 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200736
737done:
Victor Stinnered277852012-02-01 00:22:23 +0100738 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200739 return result;
740}
741
742
743/************************************************************************/
744/*********** long formatting ********************************************/
745/************************************************************************/
746
747typedef PyObject*
748(*IntOrLongToString)(PyObject *value, int base);
749
750static PyObject *
751format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
752 IntOrLongToString tostring)
753{
754 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100755 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200756 PyObject *tmp = NULL;
757 Py_ssize_t inumeric_chars;
758 Py_UCS4 sign_char = '\0';
759 Py_ssize_t n_digits; /* count of digits need from the computed
760 string */
761 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
762 produces non-digits */
763 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
764 Py_ssize_t n_total;
Victor Stinnered277852012-02-01 00:22:23 +0100765 Py_ssize_t prefix = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200766 NumberFieldWidths spec;
767 long x;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200768 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769
770 /* Locale settings, either from the actual locale or
771 from a hard-code pseudo-locale */
772 LocaleInfo locale;
773
774 /* no precision allowed on integers */
775 if (format->precision != -1) {
776 PyErr_SetString(PyExc_ValueError,
777 "Precision not allowed in integer format specifier");
778 goto done;
779 }
780
781 /* special case for character formatting */
782 if (format->type == 'c') {
783 /* error to specify a sign */
784 if (format->sign != '\0') {
785 PyErr_SetString(PyExc_ValueError,
786 "Sign not allowed with integer"
787 " format specifier 'c'");
788 goto done;
789 }
790
791 /* taken from unicodeobject.c formatchar() */
792 /* Integer input truncated to a character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200793 x = PyLong_AsLong(value);
794 if (x == -1 && PyErr_Occurred())
795 goto done;
796 if (x < 0 || x > 0x10ffff) {
797 PyErr_SetString(PyExc_OverflowError,
Victor Stinnera4ac6002012-01-21 15:50:49 +0100798 "%c arg not in range(0x110000)");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200799 goto done;
800 }
801 tmp = PyUnicode_FromOrdinal(x);
802 inumeric_chars = 0;
803 n_digits = 1;
Amaury Forgeot d'Arc6d766fc2012-01-23 23:20:43 +0100804 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200805
806 /* As a sort-of hack, we tell calc_number_widths that we only
807 have "remainder" characters. calc_number_widths thinks
808 these are characters that don't get formatted, only copied
809 into the output string. We do this for 'c' formatting,
810 because the characters are likely to be non-digits. */
811 n_remainder = 1;
812 }
813 else {
814 int base;
815 int leading_chars_to_skip = 0; /* Number of characters added by
816 PyNumber_ToBase that we want to
817 skip over. */
818
819 /* Compute the base and how many characters will be added by
820 PyNumber_ToBase */
821 switch (format->type) {
822 case 'b':
823 base = 2;
824 leading_chars_to_skip = 2; /* 0b */
825 break;
826 case 'o':
827 base = 8;
828 leading_chars_to_skip = 2; /* 0o */
829 break;
830 case 'x':
831 case 'X':
832 base = 16;
833 leading_chars_to_skip = 2; /* 0x */
834 break;
835 default: /* shouldn't be needed, but stops a compiler warning */
836 case 'd':
837 case 'n':
838 base = 10;
839 break;
840 }
841
842 /* The number of prefix chars is the same as the leading
843 chars to skip */
844 if (format->alternate)
845 n_prefix = leading_chars_to_skip;
846
847 /* Do the hard part, converting to a string in a given base */
848 tmp = tostring(value, base);
849 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
850 goto done;
851
852 inumeric_chars = 0;
853 n_digits = PyUnicode_GET_LENGTH(tmp);
854
855 prefix = inumeric_chars;
856
857 /* Is a sign character present in the output? If so, remember it
858 and skip it */
859 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
860 sign_char = '-';
861 ++prefix;
862 ++leading_chars_to_skip;
863 }
864
865 /* Skip over the leading chars (0x, 0b, etc.) */
866 n_digits -= leading_chars_to_skip;
867 inumeric_chars += leading_chars_to_skip;
868 }
869
870 /* Determine the grouping, separator, and decimal point, if any. */
871 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
872 (format->thousands_separators ?
873 LT_DEFAULT_LOCALE :
874 LT_NO_LOCALE),
875 &locale);
876
877 /* Calculate how much memory we'll need. */
878 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
879 inumeric_chars + n_digits, n_remainder, 0, &locale, format);
880
Victor Stinnera4ac6002012-01-21 15:50:49 +0100881 if (spec.n_lpadding || spec.n_spadding || spec.n_rpadding)
882 maxchar = Py_MAX(maxchar, format->fill_char);
883
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200884 /* Allocate the memory. */
885 result = PyUnicode_New(n_total, maxchar);
886 if (!result)
887 goto done;
888
889 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +0200890 err = fill_number(result, 0, &spec,
891 tmp, inumeric_chars, inumeric_chars + n_digits,
892 tmp, prefix,
893 format->fill_char == '\0' ? ' ' : format->fill_char,
894 &locale, format->type == 'X');
895 if (err)
896 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897
898done:
899 Py_XDECREF(tmp);
Victor Stinnered277852012-02-01 00:22:23 +0100900 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200901 return result;
902}
903
904/************************************************************************/
905/*********** float formatting *******************************************/
906/************************************************************************/
907
908static PyObject*
909strtounicode(char *charbuffer, Py_ssize_t len)
910{
911 return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
912}
913
914/* much of this is taken from unicodeobject.c */
915static PyObject *
916format_float_internal(PyObject *value,
917 const InternalFormatSpec *format)
918{
919 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
920 Py_ssize_t n_digits;
921 Py_ssize_t n_remainder;
922 Py_ssize_t n_total;
923 int has_decimal;
924 double val;
925 Py_ssize_t precision = format->precision;
926 Py_ssize_t default_precision = 6;
927 Py_UCS4 type = format->type;
928 int add_pct = 0;
929 Py_ssize_t index;
930 NumberFieldWidths spec;
931 int flags = 0;
932 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +0100933 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200934 Py_UCS4 sign_char = '\0';
935 int float_type; /* Used to see if we have a nan, inf, or regular float. */
936 PyObject *unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +0200937 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200938
939 /* Locale settings, either from the actual locale or
940 from a hard-code pseudo-locale */
941 LocaleInfo locale;
942
943 if (format->alternate)
944 flags |= Py_DTSF_ALT;
945
946 if (type == '\0') {
947 /* Omitted type specifier. Behaves in the same way as repr(x)
948 and str(x) if no precision is given, else like 'g', but with
949 at least one digit after the decimal point. */
950 flags |= Py_DTSF_ADD_DOT_0;
951 type = 'r';
952 default_precision = 0;
953 }
954
955 if (type == 'n')
956 /* 'n' is the same as 'g', except for the locale used to
957 format the result. We take care of that later. */
958 type = 'g';
959
960 val = PyFloat_AsDouble(value);
961 if (val == -1.0 && PyErr_Occurred())
962 goto done;
963
964 if (type == '%') {
965 type = 'f';
966 val *= 100;
967 add_pct = 1;
968 }
969
970 if (precision < 0)
971 precision = default_precision;
972 else if (type == 'r')
973 type = 'g';
974
975 /* Cast "type", because if we're in unicode we need to pass a
976 8-bit char. This is safe, because we've restricted what "type"
977 can be. */
978 buf = PyOS_double_to_string(val, (char)type, precision, flags,
979 &float_type);
980 if (buf == NULL)
981 goto done;
982 n_digits = strlen(buf);
983
984 if (add_pct) {
985 /* We know that buf has a trailing zero (since we just called
986 strlen() on it), and we don't use that fact any more. So we
987 can just write over the trailing zero. */
988 buf[n_digits] = '%';
989 n_digits += 1;
990 }
991
992 /* Since there is no unicode version of PyOS_double_to_string,
993 just use the 8 bit version and then convert to unicode. */
994 unicode_tmp = strtounicode(buf, n_digits);
995 if (unicode_tmp == NULL)
996 goto done;
997 index = 0;
998
999 /* Is a sign character present in the output? If so, remember it
1000 and skip it */
1001 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1002 sign_char = '-';
1003 ++index;
1004 --n_digits;
1005 }
1006
1007 /* Determine if we have any "remainder" (after the digits, might include
1008 decimal or exponent or both (or neither)) */
1009 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1010
1011 /* Determine the grouping, separator, and decimal point, if any. */
1012 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1013 (format->thousands_separators ?
1014 LT_DEFAULT_LOCALE :
1015 LT_NO_LOCALE),
1016 &locale);
1017
1018 /* Calculate how much memory we'll need. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001019 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001020 index + n_digits, n_remainder, has_decimal,
1021 &locale, format);
1022
Victor Stinnera4ac6002012-01-21 15:50:49 +01001023 if (spec.n_lpadding || spec.n_spadding || spec.n_rpadding)
1024 maxchar = Py_MAX(maxchar, format->fill_char);
1025
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026 /* Allocate the memory. */
1027 result = PyUnicode_New(n_total, maxchar);
1028 if (result == NULL)
1029 goto done;
1030
1031 /* Populate the memory. */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001032 err = fill_number(result, 0, &spec,
1033 unicode_tmp, index, index + n_digits,
1034 NULL, 0,
1035 format->fill_char == '\0' ? ' ' : format->fill_char,
1036 &locale, 0);
1037 if (err)
1038 Py_CLEAR(result);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001039
1040done:
1041 PyMem_Free(buf);
1042 Py_DECREF(unicode_tmp);
Victor Stinnered277852012-02-01 00:22:23 +01001043 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001044 return result;
1045}
1046
1047/************************************************************************/
1048/*********** complex formatting *****************************************/
1049/************************************************************************/
1050
1051static PyObject *
1052format_complex_internal(PyObject *value,
1053 const InternalFormatSpec *format)
1054{
1055 double re;
1056 double im;
1057 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1058 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1059
1060 InternalFormatSpec tmp_format = *format;
1061 Py_ssize_t n_re_digits;
1062 Py_ssize_t n_im_digits;
1063 Py_ssize_t n_re_remainder;
1064 Py_ssize_t n_im_remainder;
1065 Py_ssize_t n_re_total;
1066 Py_ssize_t n_im_total;
1067 int re_has_decimal;
1068 int im_has_decimal;
1069 Py_ssize_t precision = format->precision;
1070 Py_ssize_t default_precision = 6;
1071 Py_UCS4 type = format->type;
1072 Py_ssize_t i_re;
1073 Py_ssize_t i_im;
1074 NumberFieldWidths re_spec;
1075 NumberFieldWidths im_spec;
1076 int flags = 0;
1077 PyObject *result = NULL;
Amaury Forgeot d'Arccd27df32012-01-23 22:42:19 +01001078 Py_UCS4 maxchar = 127;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001079 int rkind;
1080 void *rdata;
1081 Py_ssize_t index;
1082 Py_UCS4 re_sign_char = '\0';
1083 Py_UCS4 im_sign_char = '\0';
1084 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1085 int im_float_type;
1086 int add_parens = 0;
1087 int skip_re = 0;
1088 Py_ssize_t lpad;
1089 Py_ssize_t rpad;
1090 Py_ssize_t total;
1091 PyObject *re_unicode_tmp = NULL;
1092 PyObject *im_unicode_tmp = NULL;
Victor Stinnerafbaa202011-09-28 21:50:16 +02001093 int err;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001094
1095 /* Locale settings, either from the actual locale or
1096 from a hard-code pseudo-locale */
1097 LocaleInfo locale;
1098
1099 /* Zero padding is not allowed. */
1100 if (format->fill_char == '0') {
1101 PyErr_SetString(PyExc_ValueError,
1102 "Zero padding is not allowed in complex format "
1103 "specifier");
1104 goto done;
1105 }
1106
1107 /* Neither is '=' alignment . */
1108 if (format->align == '=') {
1109 PyErr_SetString(PyExc_ValueError,
1110 "'=' alignment flag is not allowed in complex format "
1111 "specifier");
1112 goto done;
1113 }
1114
1115 re = PyComplex_RealAsDouble(value);
1116 if (re == -1.0 && PyErr_Occurred())
1117 goto done;
1118 im = PyComplex_ImagAsDouble(value);
1119 if (im == -1.0 && PyErr_Occurred())
1120 goto done;
1121
1122 if (format->alternate)
1123 flags |= Py_DTSF_ALT;
1124
1125 if (type == '\0') {
1126 /* Omitted type specifier. Should be like str(self). */
1127 type = 'r';
1128 default_precision = 0;
1129 if (re == 0.0 && copysign(1.0, re) == 1.0)
1130 skip_re = 1;
1131 else
1132 add_parens = 1;
1133 }
1134
1135 if (type == 'n')
1136 /* 'n' is the same as 'g', except for the locale used to
1137 format the result. We take care of that later. */
1138 type = 'g';
1139
1140 if (precision < 0)
1141 precision = default_precision;
1142 else if (type == 'r')
1143 type = 'g';
1144
1145 /* Cast "type", because if we're in unicode we need to pass a
1146 8-bit char. This is safe, because we've restricted what "type"
1147 can be. */
1148 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1149 &re_float_type);
1150 if (re_buf == NULL)
1151 goto done;
1152 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1153 &im_float_type);
1154 if (im_buf == NULL)
1155 goto done;
1156
1157 n_re_digits = strlen(re_buf);
1158 n_im_digits = strlen(im_buf);
1159
1160 /* Since there is no unicode version of PyOS_double_to_string,
1161 just use the 8 bit version and then convert to unicode. */
1162 re_unicode_tmp = strtounicode(re_buf, n_re_digits);
1163 if (re_unicode_tmp == NULL)
1164 goto done;
1165 i_re = 0;
1166
1167 im_unicode_tmp = strtounicode(im_buf, n_im_digits);
1168 if (im_unicode_tmp == NULL)
1169 goto done;
1170 i_im = 0;
1171
1172 /* Is a sign character present in the output? If so, remember it
1173 and skip it */
1174 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1175 re_sign_char = '-';
1176 ++i_re;
1177 --n_re_digits;
1178 }
1179 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1180 im_sign_char = '-';
1181 ++i_im;
1182 --n_im_digits;
1183 }
1184
1185 /* Determine if we have any "remainder" (after the digits, might include
1186 decimal or exponent or both (or neither)) */
Victor Stinnerafbaa202011-09-28 21:50:16 +02001187 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001188 &n_re_remainder, &re_has_decimal);
Victor Stinnerafbaa202011-09-28 21:50:16 +02001189 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001190 &n_im_remainder, &im_has_decimal);
1191
1192 /* Determine the grouping, separator, and decimal point, if any. */
1193 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1194 (format->thousands_separators ?
1195 LT_DEFAULT_LOCALE :
1196 LT_NO_LOCALE),
1197 &locale);
1198
1199 /* Turn off any padding. We'll do it later after we've composed
1200 the numbers without padding. */
1201 tmp_format.fill_char = '\0';
1202 tmp_format.align = '<';
1203 tmp_format.width = -1;
1204
1205 /* Calculate how much memory we'll need. */
1206 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1207 i_re, i_re + n_re_digits, n_re_remainder,
1208 re_has_decimal, &locale, &tmp_format);
1209
1210 /* Same formatting, but always include a sign, unless the real part is
1211 * going to be omitted, in which case we use whatever sign convention was
1212 * requested by the original format. */
1213 if (!skip_re)
1214 tmp_format.sign = '+';
1215 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1216 i_im, i_im + n_im_digits, n_im_remainder,
1217 im_has_decimal, &locale, &tmp_format);
1218
1219 if (skip_re)
1220 n_re_total = 0;
1221
1222 /* Add 1 for the 'j', and optionally 2 for parens. */
1223 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1224 format->width, format->align, &lpad, &rpad, &total);
1225
Victor Stinnera4ac6002012-01-21 15:50:49 +01001226 if (re_spec.n_lpadding || re_spec.n_spadding || re_spec.n_rpadding
1227 || im_spec.n_lpadding || im_spec.n_spadding || im_spec.n_rpadding
1228 || lpad || rpad)
1229 maxchar = Py_MAX(maxchar, format->fill_char);
1230
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001231 result = PyUnicode_New(total, maxchar);
1232 if (result == NULL)
1233 goto done;
1234 rkind = PyUnicode_KIND(result);
1235 rdata = PyUnicode_DATA(result);
1236
1237 /* Populate the memory. First, the padding. */
1238 index = fill_padding(result, 0,
1239 n_re_total + n_im_total + 1 + add_parens * 2,
1240 format->fill_char=='\0' ? ' ' : format->fill_char,
1241 lpad, rpad);
1242
1243 if (add_parens)
1244 PyUnicode_WRITE(rkind, rdata, index++, '(');
1245
1246 if (!skip_re) {
Victor Stinnerafbaa202011-09-28 21:50:16 +02001247 err = fill_number(result, index, &re_spec,
1248 re_unicode_tmp, i_re, i_re + n_re_digits,
1249 NULL, 0,
1250 0,
1251 &locale, 0);
1252 if (err) {
1253 Py_CLEAR(result);
1254 goto done;
1255 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001256 index += n_re_total;
1257 }
Victor Stinnerafbaa202011-09-28 21:50:16 +02001258 err = fill_number(result, index, &im_spec,
1259 im_unicode_tmp, i_im, i_im + n_im_digits,
1260 NULL, 0,
1261 0,
1262 &locale, 0);
1263 if (err) {
1264 Py_CLEAR(result);
1265 goto done;
1266 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001267 index += n_im_total;
1268 PyUnicode_WRITE(rkind, rdata, index++, 'j');
1269
1270 if (add_parens)
1271 PyUnicode_WRITE(rkind, rdata, index++, ')');
1272
1273done:
1274 PyMem_Free(re_buf);
1275 PyMem_Free(im_buf);
1276 Py_XDECREF(re_unicode_tmp);
1277 Py_XDECREF(im_unicode_tmp);
Victor Stinnered277852012-02-01 00:22:23 +01001278 assert(!result || _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 return result;
1280}
1281
1282/************************************************************************/
1283/*********** built in formatters ****************************************/
1284/************************************************************************/
1285PyObject *
1286_PyUnicode_FormatAdvanced(PyObject *obj,
1287 PyObject *format_spec,
1288 Py_ssize_t start, Py_ssize_t end)
1289{
1290 InternalFormatSpec format;
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001291 PyObject *result;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001292
1293 /* check for the special case of zero length format spec, make
1294 it equivalent to str(obj) */
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001295 if (start == end)
1296 return PyObject_Str(obj);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297
1298 /* parse the format_spec */
1299 if (!parse_internal_render_format_spec(format_spec, start, end,
1300 &format, 's', '<'))
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001301 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302
1303 /* type conversion? */
1304 switch (format.type) {
1305 case 's':
1306 /* no type conversion needed, already a string. do the formatting */
1307 result = format_string_internal(obj, &format);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001308 if (result != NULL)
1309 assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 break;
1311 default:
1312 /* unknown */
1313 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinnerfb9ea8c2011-10-06 01:45:57 +02001314 result = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001315 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001316 return result;
1317}
1318
1319static PyObject*
1320format_int_or_long(PyObject* obj, PyObject* format_spec,
1321 Py_ssize_t start, Py_ssize_t end,
1322 IntOrLongToString tostring)
1323{
1324 PyObject *result = NULL;
1325 PyObject *tmp = NULL;
1326 InternalFormatSpec format;
1327
1328 /* check for the special case of zero length format spec, make
1329 it equivalent to str(obj) */
1330 if (start == end) {
1331 result = PyObject_Str(obj);
1332 goto done;
1333 }
1334
1335 /* parse the format_spec */
1336 if (!parse_internal_render_format_spec(format_spec, start, end,
1337 &format, 'd', '>'))
1338 goto done;
1339
1340 /* type conversion? */
1341 switch (format.type) {
1342 case 'b':
1343 case 'c':
1344 case 'd':
1345 case 'o':
1346 case 'x':
1347 case 'X':
1348 case 'n':
1349 /* no type conversion needed, already an int (or long). do
1350 the formatting */
1351 result = format_int_or_long_internal(obj, &format, tostring);
1352 break;
1353
1354 case 'e':
1355 case 'E':
1356 case 'f':
1357 case 'F':
1358 case 'g':
1359 case 'G':
1360 case '%':
1361 /* convert to float */
1362 tmp = PyNumber_Float(obj);
1363 if (tmp == NULL)
1364 goto done;
1365 result = format_float_internal(tmp, &format);
1366 break;
1367
1368 default:
1369 /* unknown */
1370 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1371 goto done;
1372 }
1373
1374done:
1375 Py_XDECREF(tmp);
1376 return result;
1377}
1378
1379/* Need to define long_format as a function that will convert a long
1380 to a string. In 3.0, _PyLong_Format has the correct signature. */
1381#define long_format _PyLong_Format
1382
1383PyObject *
1384_PyLong_FormatAdvanced(PyObject *obj,
1385 PyObject *format_spec,
1386 Py_ssize_t start, Py_ssize_t end)
1387{
1388 return format_int_or_long(obj, format_spec, start, end,
1389 long_format);
1390}
1391
1392PyObject *
1393_PyFloat_FormatAdvanced(PyObject *obj,
1394 PyObject *format_spec,
1395 Py_ssize_t start, Py_ssize_t end)
1396{
1397 PyObject *result = NULL;
1398 InternalFormatSpec format;
1399
1400 /* check for the special case of zero length format spec, make
1401 it equivalent to str(obj) */
1402 if (start == end) {
1403 result = PyObject_Str(obj);
1404 goto done;
1405 }
1406
1407 /* parse the format_spec */
1408 if (!parse_internal_render_format_spec(format_spec, start, end,
1409 &format, '\0', '>'))
1410 goto done;
1411
1412 /* type conversion? */
1413 switch (format.type) {
1414 case '\0': /* No format code: like 'g', but with at least one decimal. */
1415 case 'e':
1416 case 'E':
1417 case 'f':
1418 case 'F':
1419 case 'g':
1420 case 'G':
1421 case 'n':
1422 case '%':
1423 /* no conversion, already a float. do the formatting */
1424 result = format_float_internal(obj, &format);
1425 break;
1426
1427 default:
1428 /* unknown */
1429 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1430 goto done;
1431 }
1432
1433done:
1434 return result;
1435}
1436
1437PyObject *
1438_PyComplex_FormatAdvanced(PyObject *obj,
1439 PyObject *format_spec,
1440 Py_ssize_t start, Py_ssize_t end)
1441{
1442 PyObject *result = NULL;
1443 InternalFormatSpec format;
1444
1445 /* check for the special case of zero length format spec, make
1446 it equivalent to str(obj) */
1447 if (start == end) {
1448 result = PyObject_Str(obj);
1449 goto done;
1450 }
1451
1452 /* parse the format_spec */
1453 if (!parse_internal_render_format_spec(format_spec, start, end,
1454 &format, '\0', '>'))
1455 goto done;
1456
1457 /* type conversion? */
1458 switch (format.type) {
1459 case '\0': /* No format code: like 'g', but with at least one decimal. */
1460 case 'e':
1461 case 'E':
1462 case 'f':
1463 case 'F':
1464 case 'g':
1465 case 'G':
1466 case 'n':
1467 /* no conversion, already a complex. do the formatting */
1468 result = format_complex_internal(obj, &format);
1469 break;
1470
1471 default:
1472 /* unknown */
1473 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1474 goto done;
1475 }
1476
1477done:
1478 return result;
1479}