blob: 9fb840b160dac3f36d03a387114212beb760df65 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5#include "Python.h"
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02006#include <locale.h>
7
8/* Raises an exception about an unknown presentation type for this
9 * type. */
10
11static void
12unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14{
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28}
29
30static void
31invalid_comma_type(Py_UCS4 presentation_type)
32{
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' with '\\x%x'.",
40 (unsigned int)presentation_type);
41}
42
43/*
44 get_integer consumes 0 or more decimal digit characters from an
45 input string, updates *result with the corresponding positive
46 integer, and returns the number of digits consumed.
47
48 returns -1 on error.
49*/
50static int
51get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
52 Py_ssize_t *result)
53{
54 Py_ssize_t accumulator, digitval, oldaccumulator;
55 int numdigits;
56 accumulator = numdigits = 0;
57 for (;;(*pos)++, numdigits++) {
58 if (*pos >= end)
59 break;
60 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
61 if (digitval < 0)
62 break;
63 /*
64 This trick was copied from old Unicode format code. It's cute,
65 but would really suck on an old machine with a slow divide
66 implementation. Fortunately, in the normal case we do not
67 expect too many digits.
68 */
69 oldaccumulator = accumulator;
70 accumulator *= 10;
71 if ((accumulator+10)/10 != oldaccumulator+1) {
72 PyErr_Format(PyExc_ValueError,
73 "Too many decimal digits in format string");
74 return -1;
75 }
76 accumulator += digitval;
77 }
78 *result = accumulator;
79 return numdigits;
80}
81
82/************************************************************************/
83/*********** standard format specifier parsing **************************/
84/************************************************************************/
85
86/* returns true if this character is a specifier alignment token */
87Py_LOCAL_INLINE(int)
88is_alignment_token(Py_UCS4 c)
89{
90 switch (c) {
91 case '<': case '>': case '=': case '^':
92 return 1;
93 default:
94 return 0;
95 }
96}
97
98/* returns true if this character is a sign element */
99Py_LOCAL_INLINE(int)
100is_sign_element(Py_UCS4 c)
101{
102 switch (c) {
103 case ' ': case '+': case '-':
104 return 1;
105 default:
106 return 0;
107 }
108}
Eric Smith8c663262007-08-25 02:26:07 +0000109
Eric Smith4a7d76d2008-05-30 18:10:19 +0000110
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200111typedef struct {
112 Py_UCS4 fill_char;
113 Py_UCS4 align;
114 int alternate;
115 Py_UCS4 sign;
116 Py_ssize_t width;
117 int thousands_separators;
118 Py_ssize_t precision;
119 Py_UCS4 type;
120} InternalFormatSpec;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000121
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200122#if 0
123/* Occassionally useful for debugging. Should normally be commented out. */
124static void
125DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
126{
127 printf("internal format spec: fill_char %d\n", format->fill_char);
128 printf("internal format spec: align %d\n", format->align);
129 printf("internal format spec: alternate %d\n", format->alternate);
130 printf("internal format spec: sign %d\n", format->sign);
131 printf("internal format spec: width %zd\n", format->width);
132 printf("internal format spec: thousands_separators %d\n",
133 format->thousands_separators);
134 printf("internal format spec: precision %zd\n", format->precision);
135 printf("internal format spec: type %c\n", format->type);
136 printf("\n");
137}
138#endif
139
140
141/*
142 ptr points to the start of the format_spec, end points just past its end.
143 fills in format with the parsed information.
144 returns 1 on success, 0 on failure.
145 if failure, sets the exception
146*/
147static int
148parse_internal_render_format_spec(PyObject *format_spec,
149 Py_ssize_t start, Py_ssize_t end,
150 InternalFormatSpec *format,
151 char default_type,
152 char default_align)
153{
154 Py_ssize_t pos = start;
155 /* end-pos is used throughout this code to specify the length of
156 the input string */
157#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
158
159 Py_ssize_t consumed;
160 int align_specified = 0;
161
162 format->fill_char = '\0';
163 format->align = default_align;
164 format->alternate = 0;
165 format->sign = '\0';
166 format->width = -1;
167 format->thousands_separators = 0;
168 format->precision = -1;
169 format->type = default_type;
170
171 /* If the second char is an alignment token,
172 then parse the fill char */
173 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
174 format->align = READ_spec(pos+1);
175 format->fill_char = READ_spec(pos);
176 align_specified = 1;
177 pos += 2;
178 }
179 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
180 format->align = READ_spec(pos);
181 align_specified = 1;
182 ++pos;
183 }
184
185 /* Parse the various sign options */
186 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
187 format->sign = READ_spec(pos);
188 ++pos;
189 }
190
191 /* If the next character is #, we're in alternate mode. This only
192 applies to integers. */
193 if (end-pos >= 1 && READ_spec(pos) == '#') {
194 format->alternate = 1;
195 ++pos;
196 }
197
198 /* The special case for 0-padding (backwards compat) */
199 if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
200 format->fill_char = '0';
201 if (!align_specified) {
202 format->align = '=';
203 }
204 ++pos;
205 }
206
207 consumed = get_integer(format_spec, &pos, end, &format->width);
208 if (consumed == -1)
209 /* Overflow error. Exception already set. */
210 return 0;
211
212 /* If consumed is 0, we didn't consume any characters for the
213 width. In that case, reset the width to -1, because
214 get_integer() will have set it to zero. -1 is how we record
215 that the width wasn't specified. */
216 if (consumed == 0)
217 format->width = -1;
218
219 /* Comma signifies add thousands separators */
220 if (end-pos && READ_spec(pos) == ',') {
221 format->thousands_separators = 1;
222 ++pos;
223 }
224
225 /* Parse field precision */
226 if (end-pos && READ_spec(pos) == '.') {
227 ++pos;
228
229 consumed = get_integer(format_spec, &pos, end, &format->precision);
230 if (consumed == -1)
231 /* Overflow error. Exception already set. */
232 return 0;
233
234 /* Not having a precision after a dot is an error. */
235 if (consumed == 0) {
236 PyErr_Format(PyExc_ValueError,
237 "Format specifier missing precision");
238 return 0;
239 }
240
241 }
242
243 /* Finally, parse the type field. */
244
245 if (end-pos > 1) {
246 /* More than one char remain, invalid conversion spec. */
247 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
248 return 0;
249 }
250
251 if (end-pos == 1) {
252 format->type = READ_spec(pos);
253 ++pos;
254 }
255
256 /* Do as much validating as we can, just by looking at the format
257 specifier. Do not take into account what type of formatting
258 we're doing (int, float, string). */
259
260 if (format->thousands_separators) {
261 switch (format->type) {
262 case 'd':
263 case 'e':
264 case 'f':
265 case 'g':
266 case 'E':
267 case 'G':
268 case '%':
269 case 'F':
270 case '\0':
271 /* These are allowed. See PEP 378.*/
272 break;
273 default:
274 invalid_comma_type(format->type);
275 return 0;
276 }
277 }
278
279 if (format->fill_char > 127 || format->align > 127 ||
280 format->sign > 127) {
281 PyErr_SetString(PyExc_ValueError, "fill character too large");
282 return 0;
283 }
284
285 return 1;
286}
287
288/* Calculate the padding needed. */
289static void
290calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
291 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
292 Py_ssize_t *n_total)
293{
294 if (width >= 0) {
295 if (nchars > width)
296 *n_total = nchars;
297 else
298 *n_total = width;
299 }
300 else {
301 /* not specified, use all of the chars and no more */
302 *n_total = nchars;
303 }
304
305 /* Figure out how much leading space we need, based on the
306 aligning */
307 if (align == '>')
308 *n_lpadding = *n_total - nchars;
309 else if (align == '^')
310 *n_lpadding = (*n_total - nchars) / 2;
311 else if (align == '<' || align == '=')
312 *n_lpadding = 0;
313 else {
314 /* We should never have an unspecified alignment. */
315 *n_lpadding = 0;
316 assert(0);
317 }
318
319 *n_rpadding = *n_total - nchars - *n_lpadding;
320}
321
322static void
323unicode_fill(PyObject *str, Py_ssize_t start, Py_ssize_t end, Py_UCS4 ch)
324{
325 int kind = PyUnicode_KIND(str);
326 void *data = PyUnicode_DATA(str);
327 while (start < end)
328 PyUnicode_WRITE(kind, data, start++, ch);
329}
330
331/* Do the padding, and return a pointer to where the caller-supplied
332 content goes. */
333static Py_ssize_t
334fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
335 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
336 Py_ssize_t n_rpadding)
337{
338 /* Pad on left. */
339 if (n_lpadding)
340 unicode_fill(s, start, start + n_lpadding, fill_char);
341
342 /* Pad on right. */
343 if (n_rpadding)
344 unicode_fill(s, start + nchars + n_lpadding,
345 start + nchars + n_lpadding + n_rpadding, fill_char);
346
347 /* Pointer to the user content. */
348 return start + n_lpadding;
349}
350
351/************************************************************************/
352/*********** common routines for numeric formatting *********************/
353/************************************************************************/
354
355/* Locale type codes. */
356#define LT_CURRENT_LOCALE 0
357#define LT_DEFAULT_LOCALE 1
358#define LT_NO_LOCALE 2
359
360/* Locale info needed for formatting integers and the part of floats
361 before and including the decimal. Note that locales only support
362 8-bit chars, not unicode. */
363typedef struct {
364 char *decimal_point;
365 char *thousands_sep;
366 char *grouping;
367} LocaleInfo;
368
369/* describes the layout for an integer, see the comment in
370 calc_number_widths() for details */
371typedef struct {
372 Py_ssize_t n_lpadding;
373 Py_ssize_t n_prefix;
374 Py_ssize_t n_spadding;
375 Py_ssize_t n_rpadding;
376 char sign;
377 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
378 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
379 any grouping chars. */
380 Py_ssize_t n_decimal; /* 0 if only an integer */
381 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
382 excluding the decimal itself, if
383 present. */
384
385 /* These 2 are not the widths of fields, but are needed by
386 STRINGLIB_GROUPING. */
387 Py_ssize_t n_digits; /* The number of digits before a decimal
388 or exponent. */
389 Py_ssize_t n_min_width; /* The min_width we used when we computed
390 the n_grouped_digits width. */
391} NumberFieldWidths;
392
393
394/* Given a number of the form:
395 digits[remainder]
396 where ptr points to the start and end points to the end, find where
397 the integer part ends. This could be a decimal, an exponent, both,
398 or neither.
399 If a decimal point is present, set *has_decimal and increment
400 remainder beyond it.
401 Results are undefined (but shouldn't crash) for improperly
402 formatted strings.
403*/
404static void
405parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
406 Py_ssize_t *n_remainder, int *has_decimal)
407{
408 Py_ssize_t remainder;
409
410 while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
411 ++pos;
412 remainder = pos;
413
414 /* Does remainder start with a decimal point? */
415 *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
416
417 /* Skip the decimal point. */
418 if (*has_decimal)
419 remainder++;
420
421 *n_remainder = end - remainder;
422}
423
424/* not all fields of format are used. for example, precision is
425 unused. should this take discrete params in order to be more clear
426 about what it does? or is passing a single format parameter easier
427 and more efficient enough to justify a little obfuscation? */
428static Py_ssize_t
429calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
430 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
431 Py_ssize_t n_end, Py_ssize_t n_remainder,
432 int has_decimal, const LocaleInfo *locale,
433 const InternalFormatSpec *format)
434{
435 Py_ssize_t n_non_digit_non_padding;
436 Py_ssize_t n_padding;
437
438 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
439 spec->n_lpadding = 0;
440 spec->n_prefix = n_prefix;
441 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
442 spec->n_remainder = n_remainder;
443 spec->n_spadding = 0;
444 spec->n_rpadding = 0;
445 spec->sign = '\0';
446 spec->n_sign = 0;
447
448 /* the output will look like:
449 | |
450 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
451 | |
452
453 sign is computed from format->sign and the actual
454 sign of the number
455
456 prefix is given (it's for the '0x' prefix)
457
458 digits is already known
459
460 the total width is either given, or computed from the
461 actual digits
462
463 only one of lpadding, spadding, and rpadding can be non-zero,
464 and it's calculated from the width and other fields
465 */
466
467 /* compute the various parts we're going to write */
468 switch (format->sign) {
469 case '+':
470 /* always put a + or - */
471 spec->n_sign = 1;
472 spec->sign = (sign_char == '-' ? '-' : '+');
473 break;
474 case ' ':
475 spec->n_sign = 1;
476 spec->sign = (sign_char == '-' ? '-' : ' ');
477 break;
478 default:
479 /* Not specified, or the default (-) */
480 if (sign_char == '-') {
481 spec->n_sign = 1;
482 spec->sign = '-';
483 }
484 }
485
486 /* The number of chars used for non-digits and non-padding. */
487 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
488 spec->n_remainder;
489
490 /* min_width can go negative, that's okay. format->width == -1 means
491 we don't care. */
492 if (format->fill_char == '0' && format->align == '=')
493 spec->n_min_width = format->width - n_non_digit_non_padding;
494 else
495 spec->n_min_width = 0;
496
497 if (spec->n_digits == 0)
498 /* This case only occurs when using 'c' formatting, we need
499 to special case it because the grouping code always wants
500 to have at least one character. */
501 spec->n_grouped_digits = 0;
502 else
503 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
504 PyUnicode_1BYTE_KIND, NULL, 0, NULL,
505 spec->n_digits, spec->n_min_width,
506 locale->grouping, locale->thousands_sep);
507
508 /* Given the desired width and the total of digit and non-digit
509 space we consume, see if we need any padding. format->width can
510 be negative (meaning no padding), but this code still works in
511 that case. */
512 n_padding = format->width -
513 (n_non_digit_non_padding + spec->n_grouped_digits);
514 if (n_padding > 0) {
515 /* Some padding is needed. Determine if it's left, space, or right. */
516 switch (format->align) {
517 case '<':
518 spec->n_rpadding = n_padding;
519 break;
520 case '^':
521 spec->n_lpadding = n_padding / 2;
522 spec->n_rpadding = n_padding - spec->n_lpadding;
523 break;
524 case '=':
525 spec->n_spadding = n_padding;
526 break;
527 case '>':
528 spec->n_lpadding = n_padding;
529 break;
530 default:
531 /* Shouldn't get here, but treat it as '>' */
532 spec->n_lpadding = n_padding;
533 assert(0);
534 break;
535 }
536 }
537 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
538 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
539 spec->n_remainder + spec->n_rpadding;
540}
541
542/* Fill in the digit parts of a numbers's string representation,
543 as determined in calc_number_widths().
544 No error checking, since we know the buffer is the correct size. */
545static void
546fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
547 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
548 PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char,
549 LocaleInfo *locale, int toupper)
550{
551 /* Used to keep track of digits, decimal, and remainder. */
552 Py_ssize_t d_pos = d_start;
553 unsigned int kind = PyUnicode_KIND(out);
554 void *data = PyUnicode_DATA(out);
555
556#ifndef NDEBUG
557 Py_ssize_t r;
558#endif
559
560 if (spec->n_lpadding) {
561 unicode_fill(out, pos, pos + spec->n_lpadding, fill_char);
562 pos += spec->n_lpadding;
563 }
564 if (spec->n_sign == 1) {
565 PyUnicode_WRITE(kind, data, pos++, spec->sign);
566 }
567 if (spec->n_prefix) {
568 PyUnicode_CopyCharacters(out, pos, prefix, p_start, spec->n_prefix);
569 if (toupper) {
570 Py_ssize_t t;
571 /* XXX if the upper-case prefix is wider than the target
572 buffer, the caller should have allocated a wider string,
573 but currently doesn't. */
574 for (t = 0; t < spec->n_prefix; ++t)
575 PyUnicode_WRITE(kind, data, pos + t,
576 Py_UNICODE_TOUPPER(
577 PyUnicode_READ(kind, data, pos + t)));
578 }
579 pos += spec->n_prefix;
580 }
581 if (spec->n_spadding) {
582 unicode_fill(out, pos, pos + spec->n_spadding, fill_char);
583 pos += spec->n_spadding;
584 }
585
586 /* Only for type 'c' special case, it has no digits. */
587 if (spec->n_digits != 0) {
588 /* Fill the digits with InsertThousandsGrouping. */
589 char *pdigits = PyUnicode_DATA(digits);
590 if (PyUnicode_KIND(digits) < kind) {
591 pdigits = _PyUnicode_AsKind(digits, kind);
592 if (pdigits == NULL) {
593 /* XXX report exception */
594 Py_FatalError("out of memory");
595 return;
596 }
597 }
598#ifndef NDEBUG
599 r =
600#endif
601 _PyUnicode_InsertThousandsGrouping(
602 kind,
603 (char*)data + PyUnicode_KIND_SIZE(kind, pos),
604 spec->n_grouped_digits,
605 pdigits + PyUnicode_KIND_SIZE(kind, d_pos),
606 spec->n_digits, spec->n_min_width,
607 locale->grouping, locale->thousands_sep);
608#ifndef NDEBUG
609 assert(r == spec->n_grouped_digits);
610#endif
611 if (PyUnicode_KIND(digits) < kind)
612 PyMem_Free(pdigits);
613 d_pos += spec->n_digits;
614 }
615 if (toupper) {
616 Py_ssize_t t;
617 for (t = 0; t < spec->n_grouped_digits; ++t)
618 PyUnicode_WRITE(kind, data, pos + t,
619 Py_UNICODE_TOUPPER(
620 PyUnicode_READ(kind, data, pos + t)));
621 }
622 pos += spec->n_grouped_digits;
623
624 if (spec->n_decimal) {
625 Py_ssize_t t;
626 for (t = 0; t < spec->n_decimal; ++t)
627 PyUnicode_WRITE(kind, data, pos + t,
628 locale->decimal_point[t]);
629 pos += spec->n_decimal;
630 d_pos += 1;
631 }
632
633 if (spec->n_remainder) {
634 PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder);
635 pos += spec->n_remainder;
636 d_pos += spec->n_remainder;
637 }
638
639 if (spec->n_rpadding) {
640 unicode_fill(out, pos, pos + spec->n_rpadding, fill_char);
641 pos += spec->n_rpadding;
642 }
643}
644
645static char no_grouping[1] = {CHAR_MAX};
646
647/* Find the decimal point character(s?), thousands_separator(s?), and
648 grouping description, either for the current locale if type is
649 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
650 none if LT_NO_LOCALE. */
651static void
652get_locale_info(int type, LocaleInfo *locale_info)
653{
654 switch (type) {
655 case LT_CURRENT_LOCALE: {
656 struct lconv *locale_data = localeconv();
657 locale_info->decimal_point = locale_data->decimal_point;
658 locale_info->thousands_sep = locale_data->thousands_sep;
659 locale_info->grouping = locale_data->grouping;
660 break;
661 }
662 case LT_DEFAULT_LOCALE:
663 locale_info->decimal_point = ".";
664 locale_info->thousands_sep = ",";
665 locale_info->grouping = "\3"; /* Group every 3 characters. The
666 (implicit) trailing 0 means repeat
667 infinitely. */
668 break;
669 case LT_NO_LOCALE:
670 locale_info->decimal_point = ".";
671 locale_info->thousands_sep = "";
672 locale_info->grouping = no_grouping;
673 break;
674 default:
675 assert(0);
676 }
677}
678
679/************************************************************************/
680/*********** string formatting ******************************************/
681/************************************************************************/
682
683static PyObject *
684format_string_internal(PyObject *value, const InternalFormatSpec *format)
685{
686 Py_ssize_t lpad;
687 Py_ssize_t rpad;
688 Py_ssize_t total;
689 Py_ssize_t pos;
690 Py_ssize_t len = PyUnicode_GET_SIZE(value);
691 PyObject *result = NULL;
692 int maxchar = 127;
693
694 /* sign is not allowed on strings */
695 if (format->sign != '\0') {
696 PyErr_SetString(PyExc_ValueError,
697 "Sign not allowed in string format specifier");
698 goto done;
699 }
700
701 /* alternate is not allowed on strings */
702 if (format->alternate) {
703 PyErr_SetString(PyExc_ValueError,
704 "Alternate form (#) not allowed in string format "
705 "specifier");
706 goto done;
707 }
708
709 /* '=' alignment not allowed on strings */
710 if (format->align == '=') {
711 PyErr_SetString(PyExc_ValueError,
712 "'=' alignment not allowed "
713 "in string format specifier");
714 goto done;
715 }
716
717 /* if precision is specified, output no more that format.precision
718 characters */
719 if (format->precision >= 0 && len >= format->precision) {
720 len = format->precision;
721 }
722
723 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
724
725 /* allocate the resulting string */
726 result = PyUnicode_New(total, maxchar);
727 if (result == NULL)
728 goto done;
729
730 /* Write into that space. First the padding. */
731 pos = fill_padding(result, 0, len,
732 format->fill_char=='\0'?' ':format->fill_char,
733 lpad, rpad);
734
735 /* Then the source string. */
736 PyUnicode_CopyCharacters(result, pos, value, 0, len);
737
738done:
739 return result;
740}
741
742
743/************************************************************************/
744/*********** long formatting ********************************************/
745/************************************************************************/
746
747typedef PyObject*
748(*IntOrLongToString)(PyObject *value, int base);
749
750static PyObject *
751format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
752 IntOrLongToString tostring)
753{
754 PyObject *result = NULL;
755 int maxchar = 127;
756 PyObject *tmp = NULL;
757 Py_ssize_t inumeric_chars;
758 Py_UCS4 sign_char = '\0';
759 Py_ssize_t n_digits; /* count of digits need from the computed
760 string */
761 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
762 produces non-digits */
763 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
764 Py_ssize_t n_total;
765 Py_ssize_t prefix;
766 NumberFieldWidths spec;
767 long x;
768
769 /* Locale settings, either from the actual locale or
770 from a hard-code pseudo-locale */
771 LocaleInfo locale;
772
773 /* no precision allowed on integers */
774 if (format->precision != -1) {
775 PyErr_SetString(PyExc_ValueError,
776 "Precision not allowed in integer format specifier");
777 goto done;
778 }
779
780 /* special case for character formatting */
781 if (format->type == 'c') {
782 /* error to specify a sign */
783 if (format->sign != '\0') {
784 PyErr_SetString(PyExc_ValueError,
785 "Sign not allowed with integer"
786 " format specifier 'c'");
787 goto done;
788 }
789
790 /* taken from unicodeobject.c formatchar() */
791 /* Integer input truncated to a character */
792/* XXX: won't work for int */
793 x = PyLong_AsLong(value);
794 if (x == -1 && PyErr_Occurred())
795 goto done;
796 if (x < 0 || x > 0x10ffff) {
797 PyErr_SetString(PyExc_OverflowError,
798 "%c arg not in range(0x110000) "
799 "(wide Python build)");
800 goto done;
801 }
802 tmp = PyUnicode_FromOrdinal(x);
803 inumeric_chars = 0;
804 n_digits = 1;
805 if (x > maxchar)
806 maxchar = x;
807
808 /* As a sort-of hack, we tell calc_number_widths that we only
809 have "remainder" characters. calc_number_widths thinks
810 these are characters that don't get formatted, only copied
811 into the output string. We do this for 'c' formatting,
812 because the characters are likely to be non-digits. */
813 n_remainder = 1;
814 }
815 else {
816 int base;
817 int leading_chars_to_skip = 0; /* Number of characters added by
818 PyNumber_ToBase that we want to
819 skip over. */
820
821 /* Compute the base and how many characters will be added by
822 PyNumber_ToBase */
823 switch (format->type) {
824 case 'b':
825 base = 2;
826 leading_chars_to_skip = 2; /* 0b */
827 break;
828 case 'o':
829 base = 8;
830 leading_chars_to_skip = 2; /* 0o */
831 break;
832 case 'x':
833 case 'X':
834 base = 16;
835 leading_chars_to_skip = 2; /* 0x */
836 break;
837 default: /* shouldn't be needed, but stops a compiler warning */
838 case 'd':
839 case 'n':
840 base = 10;
841 break;
842 }
843
844 /* The number of prefix chars is the same as the leading
845 chars to skip */
846 if (format->alternate)
847 n_prefix = leading_chars_to_skip;
848
849 /* Do the hard part, converting to a string in a given base */
850 tmp = tostring(value, base);
851 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
852 goto done;
853
854 inumeric_chars = 0;
855 n_digits = PyUnicode_GET_LENGTH(tmp);
856
857 prefix = inumeric_chars;
858
859 /* Is a sign character present in the output? If so, remember it
860 and skip it */
861 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
862 sign_char = '-';
863 ++prefix;
864 ++leading_chars_to_skip;
865 }
866
867 /* Skip over the leading chars (0x, 0b, etc.) */
868 n_digits -= leading_chars_to_skip;
869 inumeric_chars += leading_chars_to_skip;
870 }
871
872 /* Determine the grouping, separator, and decimal point, if any. */
873 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
874 (format->thousands_separators ?
875 LT_DEFAULT_LOCALE :
876 LT_NO_LOCALE),
877 &locale);
878
879 /* Calculate how much memory we'll need. */
880 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
881 inumeric_chars + n_digits, n_remainder, 0, &locale, format);
882
883 /* Allocate the memory. */
884 result = PyUnicode_New(n_total, maxchar);
885 if (!result)
886 goto done;
887
888 /* Populate the memory. */
889 fill_number(result, 0, &spec, tmp, inumeric_chars, inumeric_chars + n_digits,
890 tmp, prefix,
891 format->fill_char == '\0' ? ' ' : format->fill_char,
892 &locale, format->type == 'X');
893
894done:
895 Py_XDECREF(tmp);
896 return result;
897}
898
899/************************************************************************/
900/*********** float formatting *******************************************/
901/************************************************************************/
902
903static PyObject*
904strtounicode(char *charbuffer, Py_ssize_t len)
905{
906 return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
907}
908
909/* much of this is taken from unicodeobject.c */
910static PyObject *
911format_float_internal(PyObject *value,
912 const InternalFormatSpec *format)
913{
914 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
915 Py_ssize_t n_digits;
916 Py_ssize_t n_remainder;
917 Py_ssize_t n_total;
918 int has_decimal;
919 double val;
920 Py_ssize_t precision = format->precision;
921 Py_ssize_t default_precision = 6;
922 Py_UCS4 type = format->type;
923 int add_pct = 0;
924 Py_ssize_t index;
925 NumberFieldWidths spec;
926 int flags = 0;
927 PyObject *result = NULL;
928 int maxchar = 127;
929 Py_UCS4 sign_char = '\0';
930 int float_type; /* Used to see if we have a nan, inf, or regular float. */
931 PyObject *unicode_tmp = NULL;
932
933 /* Locale settings, either from the actual locale or
934 from a hard-code pseudo-locale */
935 LocaleInfo locale;
936
937 if (format->alternate)
938 flags |= Py_DTSF_ALT;
939
940 if (type == '\0') {
941 /* Omitted type specifier. Behaves in the same way as repr(x)
942 and str(x) if no precision is given, else like 'g', but with
943 at least one digit after the decimal point. */
944 flags |= Py_DTSF_ADD_DOT_0;
945 type = 'r';
946 default_precision = 0;
947 }
948
949 if (type == 'n')
950 /* 'n' is the same as 'g', except for the locale used to
951 format the result. We take care of that later. */
952 type = 'g';
953
954 val = PyFloat_AsDouble(value);
955 if (val == -1.0 && PyErr_Occurred())
956 goto done;
957
958 if (type == '%') {
959 type = 'f';
960 val *= 100;
961 add_pct = 1;
962 }
963
964 if (precision < 0)
965 precision = default_precision;
966 else if (type == 'r')
967 type = 'g';
968
969 /* Cast "type", because if we're in unicode we need to pass a
970 8-bit char. This is safe, because we've restricted what "type"
971 can be. */
972 buf = PyOS_double_to_string(val, (char)type, precision, flags,
973 &float_type);
974 if (buf == NULL)
975 goto done;
976 n_digits = strlen(buf);
977
978 if (add_pct) {
979 /* We know that buf has a trailing zero (since we just called
980 strlen() on it), and we don't use that fact any more. So we
981 can just write over the trailing zero. */
982 buf[n_digits] = '%';
983 n_digits += 1;
984 }
985
986 /* Since there is no unicode version of PyOS_double_to_string,
987 just use the 8 bit version and then convert to unicode. */
988 unicode_tmp = strtounicode(buf, n_digits);
989 if (unicode_tmp == NULL)
990 goto done;
991 index = 0;
992
993 /* Is a sign character present in the output? If so, remember it
994 and skip it */
995 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
996 sign_char = '-';
997 ++index;
998 --n_digits;
999 }
1000
1001 /* Determine if we have any "remainder" (after the digits, might include
1002 decimal or exponent or both (or neither)) */
1003 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1004
1005 /* Determine the grouping, separator, and decimal point, if any. */
1006 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1007 (format->thousands_separators ?
1008 LT_DEFAULT_LOCALE :
1009 LT_NO_LOCALE),
1010 &locale);
1011
1012 /* Calculate how much memory we'll need. */
1013 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1014 index + n_digits, n_remainder, has_decimal,
1015 &locale, format);
1016
1017 /* Allocate the memory. */
1018 result = PyUnicode_New(n_total, maxchar);
1019 if (result == NULL)
1020 goto done;
1021
1022 /* Populate the memory. */
1023 fill_number(result, 0, &spec, unicode_tmp, index, index + n_digits,
1024 NULL, 0,
1025 format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
1026 0);
1027
1028done:
1029 PyMem_Free(buf);
1030 Py_DECREF(unicode_tmp);
1031 return result;
1032}
1033
1034/************************************************************************/
1035/*********** complex formatting *****************************************/
1036/************************************************************************/
1037
1038static PyObject *
1039format_complex_internal(PyObject *value,
1040 const InternalFormatSpec *format)
1041{
1042 double re;
1043 double im;
1044 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1045 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1046
1047 InternalFormatSpec tmp_format = *format;
1048 Py_ssize_t n_re_digits;
1049 Py_ssize_t n_im_digits;
1050 Py_ssize_t n_re_remainder;
1051 Py_ssize_t n_im_remainder;
1052 Py_ssize_t n_re_total;
1053 Py_ssize_t n_im_total;
1054 int re_has_decimal;
1055 int im_has_decimal;
1056 Py_ssize_t precision = format->precision;
1057 Py_ssize_t default_precision = 6;
1058 Py_UCS4 type = format->type;
1059 Py_ssize_t i_re;
1060 Py_ssize_t i_im;
1061 NumberFieldWidths re_spec;
1062 NumberFieldWidths im_spec;
1063 int flags = 0;
1064 PyObject *result = NULL;
1065 int maxchar = 127;
1066 int rkind;
1067 void *rdata;
1068 Py_ssize_t index;
1069 Py_UCS4 re_sign_char = '\0';
1070 Py_UCS4 im_sign_char = '\0';
1071 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1072 int im_float_type;
1073 int add_parens = 0;
1074 int skip_re = 0;
1075 Py_ssize_t lpad;
1076 Py_ssize_t rpad;
1077 Py_ssize_t total;
1078 PyObject *re_unicode_tmp = NULL;
1079 PyObject *im_unicode_tmp = NULL;
1080
1081 /* Locale settings, either from the actual locale or
1082 from a hard-code pseudo-locale */
1083 LocaleInfo locale;
1084
1085 /* Zero padding is not allowed. */
1086 if (format->fill_char == '0') {
1087 PyErr_SetString(PyExc_ValueError,
1088 "Zero padding is not allowed in complex format "
1089 "specifier");
1090 goto done;
1091 }
1092
1093 /* Neither is '=' alignment . */
1094 if (format->align == '=') {
1095 PyErr_SetString(PyExc_ValueError,
1096 "'=' alignment flag is not allowed in complex format "
1097 "specifier");
1098 goto done;
1099 }
1100
1101 re = PyComplex_RealAsDouble(value);
1102 if (re == -1.0 && PyErr_Occurred())
1103 goto done;
1104 im = PyComplex_ImagAsDouble(value);
1105 if (im == -1.0 && PyErr_Occurred())
1106 goto done;
1107
1108 if (format->alternate)
1109 flags |= Py_DTSF_ALT;
1110
1111 if (type == '\0') {
1112 /* Omitted type specifier. Should be like str(self). */
1113 type = 'r';
1114 default_precision = 0;
1115 if (re == 0.0 && copysign(1.0, re) == 1.0)
1116 skip_re = 1;
1117 else
1118 add_parens = 1;
1119 }
1120
1121 if (type == 'n')
1122 /* 'n' is the same as 'g', except for the locale used to
1123 format the result. We take care of that later. */
1124 type = 'g';
1125
1126 if (precision < 0)
1127 precision = default_precision;
1128 else if (type == 'r')
1129 type = 'g';
1130
1131 /* Cast "type", because if we're in unicode we need to pass a
1132 8-bit char. This is safe, because we've restricted what "type"
1133 can be. */
1134 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1135 &re_float_type);
1136 if (re_buf == NULL)
1137 goto done;
1138 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1139 &im_float_type);
1140 if (im_buf == NULL)
1141 goto done;
1142
1143 n_re_digits = strlen(re_buf);
1144 n_im_digits = strlen(im_buf);
1145
1146 /* Since there is no unicode version of PyOS_double_to_string,
1147 just use the 8 bit version and then convert to unicode. */
1148 re_unicode_tmp = strtounicode(re_buf, n_re_digits);
1149 if (re_unicode_tmp == NULL)
1150 goto done;
1151 i_re = 0;
1152
1153 im_unicode_tmp = strtounicode(im_buf, n_im_digits);
1154 if (im_unicode_tmp == NULL)
1155 goto done;
1156 i_im = 0;
1157
1158 /* Is a sign character present in the output? If so, remember it
1159 and skip it */
1160 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1161 re_sign_char = '-';
1162 ++i_re;
1163 --n_re_digits;
1164 }
1165 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1166 im_sign_char = '-';
1167 ++i_im;
1168 --n_im_digits;
1169 }
1170
1171 /* Determine if we have any "remainder" (after the digits, might include
1172 decimal or exponent or both (or neither)) */
1173 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1174 &n_re_remainder, &re_has_decimal);
1175 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1176 &n_im_remainder, &im_has_decimal);
1177
1178 /* Determine the grouping, separator, and decimal point, if any. */
1179 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1180 (format->thousands_separators ?
1181 LT_DEFAULT_LOCALE :
1182 LT_NO_LOCALE),
1183 &locale);
1184
1185 /* Turn off any padding. We'll do it later after we've composed
1186 the numbers without padding. */
1187 tmp_format.fill_char = '\0';
1188 tmp_format.align = '<';
1189 tmp_format.width = -1;
1190
1191 /* Calculate how much memory we'll need. */
1192 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1193 i_re, i_re + n_re_digits, n_re_remainder,
1194 re_has_decimal, &locale, &tmp_format);
1195
1196 /* Same formatting, but always include a sign, unless the real part is
1197 * going to be omitted, in which case we use whatever sign convention was
1198 * requested by the original format. */
1199 if (!skip_re)
1200 tmp_format.sign = '+';
1201 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1202 i_im, i_im + n_im_digits, n_im_remainder,
1203 im_has_decimal, &locale, &tmp_format);
1204
1205 if (skip_re)
1206 n_re_total = 0;
1207
1208 /* Add 1 for the 'j', and optionally 2 for parens. */
1209 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1210 format->width, format->align, &lpad, &rpad, &total);
1211
1212 result = PyUnicode_New(total, maxchar);
1213 if (result == NULL)
1214 goto done;
1215 rkind = PyUnicode_KIND(result);
1216 rdata = PyUnicode_DATA(result);
1217
1218 /* Populate the memory. First, the padding. */
1219 index = fill_padding(result, 0,
1220 n_re_total + n_im_total + 1 + add_parens * 2,
1221 format->fill_char=='\0' ? ' ' : format->fill_char,
1222 lpad, rpad);
1223
1224 if (add_parens)
1225 PyUnicode_WRITE(rkind, rdata, index++, '(');
1226
1227 if (!skip_re) {
1228 fill_number(result, index, &re_spec, re_unicode_tmp,
1229 i_re, i_re + n_re_digits, NULL, 0, 0, &locale, 0);
1230 index += n_re_total;
1231 }
1232 fill_number(result, index, &im_spec, im_unicode_tmp,
1233 i_im, i_im + n_im_digits, NULL, 0, 0, &locale, 0);
1234 index += n_im_total;
1235 PyUnicode_WRITE(rkind, rdata, index++, 'j');
1236
1237 if (add_parens)
1238 PyUnicode_WRITE(rkind, rdata, index++, ')');
1239
1240done:
1241 PyMem_Free(re_buf);
1242 PyMem_Free(im_buf);
1243 Py_XDECREF(re_unicode_tmp);
1244 Py_XDECREF(im_unicode_tmp);
1245 return result;
1246}
1247
1248/************************************************************************/
1249/*********** built in formatters ****************************************/
1250/************************************************************************/
1251PyObject *
1252_PyUnicode_FormatAdvanced(PyObject *obj,
1253 PyObject *format_spec,
1254 Py_ssize_t start, Py_ssize_t end)
1255{
1256 InternalFormatSpec format;
1257 PyObject *result = NULL;
1258
1259 /* check for the special case of zero length format spec, make
1260 it equivalent to str(obj) */
1261 if (start == end) {
1262 result = PyObject_Str(obj);
1263 goto done;
1264 }
1265
1266 /* parse the format_spec */
1267 if (!parse_internal_render_format_spec(format_spec, start, end,
1268 &format, 's', '<'))
1269 goto done;
1270
1271 /* type conversion? */
1272 switch (format.type) {
1273 case 's':
1274 /* no type conversion needed, already a string. do the formatting */
1275 result = format_string_internal(obj, &format);
1276 break;
1277 default:
1278 /* unknown */
1279 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1280 goto done;
1281 }
1282
1283done:
1284 return result;
1285}
1286
1287static PyObject*
1288format_int_or_long(PyObject* obj, PyObject* format_spec,
1289 Py_ssize_t start, Py_ssize_t end,
1290 IntOrLongToString tostring)
1291{
1292 PyObject *result = NULL;
1293 PyObject *tmp = NULL;
1294 InternalFormatSpec format;
1295
1296 /* check for the special case of zero length format spec, make
1297 it equivalent to str(obj) */
1298 if (start == end) {
1299 result = PyObject_Str(obj);
1300 goto done;
1301 }
1302
1303 /* parse the format_spec */
1304 if (!parse_internal_render_format_spec(format_spec, start, end,
1305 &format, 'd', '>'))
1306 goto done;
1307
1308 /* type conversion? */
1309 switch (format.type) {
1310 case 'b':
1311 case 'c':
1312 case 'd':
1313 case 'o':
1314 case 'x':
1315 case 'X':
1316 case 'n':
1317 /* no type conversion needed, already an int (or long). do
1318 the formatting */
1319 result = format_int_or_long_internal(obj, &format, tostring);
1320 break;
1321
1322 case 'e':
1323 case 'E':
1324 case 'f':
1325 case 'F':
1326 case 'g':
1327 case 'G':
1328 case '%':
1329 /* convert to float */
1330 tmp = PyNumber_Float(obj);
1331 if (tmp == NULL)
1332 goto done;
1333 result = format_float_internal(tmp, &format);
1334 break;
1335
1336 default:
1337 /* unknown */
1338 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1339 goto done;
1340 }
1341
1342done:
1343 Py_XDECREF(tmp);
1344 return result;
1345}
1346
1347/* Need to define long_format as a function that will convert a long
1348 to a string. In 3.0, _PyLong_Format has the correct signature. */
1349#define long_format _PyLong_Format
1350
1351PyObject *
1352_PyLong_FormatAdvanced(PyObject *obj,
1353 PyObject *format_spec,
1354 Py_ssize_t start, Py_ssize_t end)
1355{
1356 return format_int_or_long(obj, format_spec, start, end,
1357 long_format);
1358}
1359
1360PyObject *
1361_PyFloat_FormatAdvanced(PyObject *obj,
1362 PyObject *format_spec,
1363 Py_ssize_t start, Py_ssize_t end)
1364{
1365 PyObject *result = NULL;
1366 InternalFormatSpec format;
1367
1368 /* check for the special case of zero length format spec, make
1369 it equivalent to str(obj) */
1370 if (start == end) {
1371 result = PyObject_Str(obj);
1372 goto done;
1373 }
1374
1375 /* parse the format_spec */
1376 if (!parse_internal_render_format_spec(format_spec, start, end,
1377 &format, '\0', '>'))
1378 goto done;
1379
1380 /* type conversion? */
1381 switch (format.type) {
1382 case '\0': /* No format code: like 'g', but with at least one decimal. */
1383 case 'e':
1384 case 'E':
1385 case 'f':
1386 case 'F':
1387 case 'g':
1388 case 'G':
1389 case 'n':
1390 case '%':
1391 /* no conversion, already a float. do the formatting */
1392 result = format_float_internal(obj, &format);
1393 break;
1394
1395 default:
1396 /* unknown */
1397 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1398 goto done;
1399 }
1400
1401done:
1402 return result;
1403}
1404
1405PyObject *
1406_PyComplex_FormatAdvanced(PyObject *obj,
1407 PyObject *format_spec,
1408 Py_ssize_t start, Py_ssize_t end)
1409{
1410 PyObject *result = NULL;
1411 InternalFormatSpec format;
1412
1413 /* check for the special case of zero length format spec, make
1414 it equivalent to str(obj) */
1415 if (start == end) {
1416 result = PyObject_Str(obj);
1417 goto done;
1418 }
1419
1420 /* parse the format_spec */
1421 if (!parse_internal_render_format_spec(format_spec, start, end,
1422 &format, '\0', '>'))
1423 goto done;
1424
1425 /* type conversion? */
1426 switch (format.type) {
1427 case '\0': /* No format code: like 'g', but with at least one decimal. */
1428 case 'e':
1429 case 'E':
1430 case 'f':
1431 case 'F':
1432 case 'g':
1433 case 'G':
1434 case 'n':
1435 /* no conversion, already a complex. do the formatting */
1436 result = format_complex_internal(obj, &format);
1437 break;
1438
1439 default:
1440 /* unknown */
1441 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1442 goto done;
1443 }
1444
1445done:
1446 return result;
1447}