blob: 22dd292ce9ea41313ce19d802b6139473cd4ee4b [file] [log] [blame]
Eric Smitha9f7d622008-02-17 19:46:49 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
16#define ALLOW_PARENS_FOR_SIGN 0
17
18/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
78 case ' ': case '+': case '-':
79#if ALLOW_PARENS_FOR_SIGN
80 case '(':
81#endif
82 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
92 STRINGLIB_CHAR sign;
93 Py_ssize_t width;
94 Py_ssize_t precision;
95 STRINGLIB_CHAR type;
96} InternalFormatSpec;
97
98/*
99 ptr points to the start of the format_spec, end points just past its end.
100 fills in format with the parsed information.
101 returns 1 on success, 0 on failure.
102 if failure, sets the exception
103*/
104static int
105parse_internal_render_format_spec(PyObject *format_spec,
106 InternalFormatSpec *format,
107 char default_type)
108{
109 STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec);
110 STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec);
111
112 /* end-ptr is used throughout this code to specify the length of
113 the input string */
114
115 Py_ssize_t specified_width;
116
117 format->fill_char = '\0';
118 format->align = '\0';
119 format->sign = '\0';
120 format->width = -1;
121 format->precision = -1;
122 format->type = default_type;
123
124 /* If the second char is an alignment token,
125 then parse the fill char */
126 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
127 format->align = ptr[1];
128 format->fill_char = ptr[0];
129 ptr += 2;
130 }
131 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
132 format->align = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000133 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000134 }
135
136 /* Parse the various sign options */
137 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
138 format->sign = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000139 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000140#if ALLOW_PARENS_FOR_SIGN
141 if (end-ptr >= 1 && ptr[0] == ')') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000142 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000143 }
144#endif
145 }
146
147 /* The special case for 0-padding (backwards compat) */
148 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
149 format->fill_char = '0';
150 if (format->align == '\0') {
151 format->align = '=';
152 }
Eric Smith8a803dd2008-02-20 23:39:28 +0000153 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000154 }
155
156 /* XXX add error checking */
157 specified_width = get_integer(&ptr, end, &format->width);
158
159 /* if specified_width is 0, we didn't consume any characters for
160 the width. in that case, reset the width to -1, because
161 get_integer() will have set it to zero */
162 if (specified_width == 0) {
163 format->width = -1;
164 }
165
166 /* Parse field precision */
167 if (end-ptr && ptr[0] == '.') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000168 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000169
170 /* XXX add error checking */
171 specified_width = get_integer(&ptr, end, &format->precision);
172
173 /* not having a precision after a dot is an error */
174 if (specified_width == 0) {
175 PyErr_Format(PyExc_ValueError,
176 "Format specifier missing precision");
177 return 0;
178 }
179
180 }
181
182 /* Finally, parse the type field */
183
184 if (end-ptr > 1) {
185 /* invalid conversion spec */
186 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
187 return 0;
188 }
189
190 if (end-ptr == 1) {
191 format->type = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000192 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000193 }
194
195 return 1;
196}
197
198#if defined FORMAT_FLOAT || defined FORMAT_LONG
199/************************************************************************/
200/*********** common routines for numeric formatting *********************/
201/************************************************************************/
202
203/* describes the layout for an integer, see the comment in
204 _calc_integer_widths() for details */
205typedef struct {
206 Py_ssize_t n_lpadding;
207 Py_ssize_t n_spadding;
208 Py_ssize_t n_rpadding;
209 char lsign;
210 Py_ssize_t n_lsign;
211 char rsign;
212 Py_ssize_t n_rsign;
213 Py_ssize_t n_total; /* just a convenience, it's derivable from the
214 other fields */
215} NumberFieldWidths;
216
217/* not all fields of format are used. for example, precision is
218 unused. should this take discrete params in order to be more clear
219 about what it does? or is passing a single format parameter easier
220 and more efficient enough to justify a little obfuscation? */
221static void
222calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
223 Py_ssize_t n_digits, const InternalFormatSpec *format)
224{
225 r->n_lpadding = 0;
226 r->n_spadding = 0;
227 r->n_rpadding = 0;
228 r->lsign = '\0';
229 r->n_lsign = 0;
230 r->rsign = '\0';
231 r->n_rsign = 0;
232
233 /* the output will look like:
234 | |
235 | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
236 | |
237
238 lsign and rsign are computed from format->sign and the actual
239 sign of the number
240
241 digits is already known
242
243 the total width is either given, or computed from the
244 actual digits
245
246 only one of lpadding, spadding, and rpadding can be non-zero,
247 and it's calculated from the width and other fields
248 */
249
250 /* compute the various parts we're going to write */
251 if (format->sign == '+') {
252 /* always put a + or - */
253 r->n_lsign = 1;
254 r->lsign = (actual_sign == '-' ? '-' : '+');
255 }
256#if ALLOW_PARENS_FOR_SIGN
257 else if (format->sign == '(') {
258 if (actual_sign == '-') {
259 r->n_lsign = 1;
260 r->lsign = '(';
261 r->n_rsign = 1;
262 r->rsign = ')';
263 }
264 }
265#endif
266 else if (format->sign == ' ') {
267 r->n_lsign = 1;
268 r->lsign = (actual_sign == '-' ? '-' : ' ');
269 }
270 else {
271 /* non specified, or the default (-) */
272 if (actual_sign == '-') {
273 r->n_lsign = 1;
274 r->lsign = '-';
275 }
276 }
277
278 /* now the number of padding characters */
279 if (format->width == -1) {
280 /* no padding at all, nothing to do */
281 }
282 else {
283 /* see if any padding is needed */
284 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
285 /* no padding needed, we're already bigger than the
286 requested width */
287 }
288 else {
289 /* determine which of left, space, or right padding is
290 needed */
291 Py_ssize_t padding = format->width -
292 (r->n_lsign + n_digits + r->n_rsign);
293 if (format->align == '<')
294 r->n_rpadding = padding;
295 else if (format->align == '>')
296 r->n_lpadding = padding;
297 else if (format->align == '^') {
298 r->n_lpadding = padding / 2;
299 r->n_rpadding = padding - r->n_lpadding;
300 }
301 else if (format->align == '=')
302 r->n_spadding = padding;
303 else
304 r->n_lpadding = padding;
305 }
306 }
307 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
308 n_digits + r->n_rsign + r->n_rpadding;
309}
310
311/* fill in the non-digit parts of a numbers's string representation,
312 as determined in _calc_integer_widths(). returns the pointer to
313 where the digits go. */
314static STRINGLIB_CHAR *
315fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
316 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
317{
318 STRINGLIB_CHAR* p_digits;
319
320 if (spec->n_lpadding) {
321 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
322 p_buf += spec->n_lpadding;
323 }
324 if (spec->n_lsign == 1) {
325 *p_buf++ = spec->lsign;
326 }
327 if (spec->n_spadding) {
328 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
329 p_buf += spec->n_spadding;
330 }
331 p_digits = p_buf;
332 p_buf += n_digits;
333 if (spec->n_rsign == 1) {
334 *p_buf++ = spec->rsign;
335 }
336 if (spec->n_rpadding) {
337 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
338 p_buf += spec->n_rpadding;
339 }
340 return p_digits;
341}
342#endif /* FORMAT_FLOAT || FORMAT_LONG */
343
344/************************************************************************/
345/*********** string formatting ******************************************/
346/************************************************************************/
347
348static PyObject *
349format_string_internal(PyObject *value, const InternalFormatSpec *format)
350{
351 Py_ssize_t width; /* total field width */
352 Py_ssize_t lpad;
353 STRINGLIB_CHAR *dst;
354 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
355 Py_ssize_t len = STRINGLIB_LEN(value);
356 PyObject *result = NULL;
357
358 /* sign is not allowed on strings */
359 if (format->sign != '\0') {
360 PyErr_SetString(PyExc_ValueError,
361 "Sign not allowed in string format specifier");
362 goto done;
363 }
364
365 /* '=' alignment not allowed on strings */
366 if (format->align == '=') {
367 PyErr_SetString(PyExc_ValueError,
368 "'=' alignment not allowed "
369 "in string format specifier");
370 goto done;
371 }
372
373 /* if precision is specified, output no more that format.precision
374 characters */
375 if (format->precision >= 0 && len >= format->precision) {
376 len = format->precision;
377 }
378
379 if (format->width >= 0) {
380 width = format->width;
381
382 /* but use at least len characters */
383 if (len > width) {
384 width = len;
385 }
386 }
387 else {
388 /* not specified, use all of the chars and no more */
389 width = len;
390 }
391
392 /* allocate the resulting string */
393 result = STRINGLIB_NEW(NULL, width);
394 if (result == NULL)
395 goto done;
396
397 /* now write into that space */
398 dst = STRINGLIB_STR(result);
399
400 /* figure out how much leading space we need, based on the
401 aligning */
402 if (format->align == '>')
403 lpad = width - len;
404 else if (format->align == '^')
405 lpad = (width - len) / 2;
406 else
407 lpad = 0;
408
409 /* if right aligning, increment the destination allow space on the
410 left */
411 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
412
413 /* do any padding */
414 if (width > len) {
415 STRINGLIB_CHAR fill_char = format->fill_char;
416 if (fill_char == '\0') {
417 /* use the default, if not specified */
418 fill_char = ' ';
419 }
420
421 /* pad on left */
422 if (lpad)
423 STRINGLIB_FILL(dst, fill_char, lpad);
424
425 /* pad on right */
426 if (width - len - lpad)
427 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
428 }
429
430done:
431 return result;
432}
433
434
435/************************************************************************/
436/*********** long formatting ********************************************/
437/************************************************************************/
438
439#if defined FORMAT_LONG || defined FORMAT_INT
440typedef PyObject*
441(*IntOrLongToString)(PyObject *value, int base);
442
443static PyObject *
444format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
445 IntOrLongToString tostring)
446{
447 PyObject *result = NULL;
448 PyObject *tmp = NULL;
449 STRINGLIB_CHAR *pnumeric_chars;
450 STRINGLIB_CHAR numeric_char;
451 STRINGLIB_CHAR sign = '\0';
452 STRINGLIB_CHAR *p;
453 Py_ssize_t n_digits; /* count of digits need from the computed
454 string */
455 Py_ssize_t n_leading_chars;
Eric Smithcf537ff2008-05-11 19:52:48 +0000456 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
457 allocate, used for 'n'
458 formatting. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000459 NumberFieldWidths spec;
460 long x;
461
462 /* no precision allowed on integers */
463 if (format->precision != -1) {
464 PyErr_SetString(PyExc_ValueError,
465 "Precision not allowed in integer format specifier");
466 goto done;
467 }
468
469
470 /* special case for character formatting */
471 if (format->type == 'c') {
472 /* error to specify a sign */
473 if (format->sign != '\0') {
474 PyErr_SetString(PyExc_ValueError,
475 "Sign not allowed with integer"
476 " format specifier 'c'");
477 goto done;
478 }
479
480 /* taken from unicodeobject.c formatchar() */
481 /* Integer input truncated to a character */
482/* XXX: won't work for int */
483 x = PyLong_AsLong(value);
484 if (x == -1 && PyErr_Occurred())
485 goto done;
486#ifdef Py_UNICODE_WIDE
487 if (x < 0 || x > 0x10ffff) {
488 PyErr_SetString(PyExc_OverflowError,
489 "%c arg not in range(0x110000) "
490 "(wide Python build)");
491 goto done;
492 }
493#else
494 if (x < 0 || x > 0xffff) {
495 PyErr_SetString(PyExc_OverflowError,
496 "%c arg not in range(0x10000) "
497 "(narrow Python build)");
498 goto done;
499 }
500#endif
501 numeric_char = (STRINGLIB_CHAR)x;
502 pnumeric_chars = &numeric_char;
503 n_digits = 1;
504 }
505 else {
506 int base;
507 int leading_chars_to_skip; /* Number of characters added by
508 PyNumber_ToBase that we want to
509 skip over. */
510
511 /* Compute the base and how many characters will be added by
512 PyNumber_ToBase */
513 switch (format->type) {
514 case 'b':
515 base = 2;
516 leading_chars_to_skip = 2; /* 0b */
517 break;
518 case 'o':
519 base = 8;
520 leading_chars_to_skip = 2; /* 0o */
521 break;
522 case 'x':
523 case 'X':
524 base = 16;
525 leading_chars_to_skip = 2; /* 0x */
526 break;
527 default: /* shouldn't be needed, but stops a compiler warning */
528 case 'd':
Eric Smithcf537ff2008-05-11 19:52:48 +0000529 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000530 base = 10;
531 leading_chars_to_skip = 0;
532 break;
533 }
534
535 /* Do the hard part, converting to a string in a given base */
536 tmp = tostring(value, base);
537 if (tmp == NULL)
538 goto done;
539
540 pnumeric_chars = STRINGLIB_STR(tmp);
541 n_digits = STRINGLIB_LEN(tmp);
542
543 /* Remember not to modify what pnumeric_chars points to. it
544 might be interned. Only modify it after we copy it into a
545 newly allocated output buffer. */
546
547 /* Is a sign character present in the output? If so, remember it
548 and skip it */
549 sign = pnumeric_chars[0];
550 if (sign == '-') {
551 ++leading_chars_to_skip;
552 }
553
554 /* Skip over the leading chars (0x, 0b, etc.) */
555 n_digits -= leading_chars_to_skip;
556 pnumeric_chars += leading_chars_to_skip;
557 }
558
559 /* Calculate the widths of the various leading and trailing parts */
560 calc_number_widths(&spec, sign, n_digits, format);
561
Eric Smithcf537ff2008-05-11 19:52:48 +0000562 if (format->type == 'n')
563 /* Compute how many additional chars we need to allocate
564 to hold the thousands grouping. */
565 STRINGLIB_GROUPING(pnumeric_chars, n_digits,
566 pnumeric_chars+n_digits,
567 0, &n_grouping_chars, 0);
568
Eric Smitha9f7d622008-02-17 19:46:49 +0000569 /* Allocate a new string to hold the result */
Eric Smithcf537ff2008-05-11 19:52:48 +0000570 result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
Eric Smitha9f7d622008-02-17 19:46:49 +0000571 if (!result)
572 goto done;
573 p = STRINGLIB_STR(result);
574
575 /* Fill in the digit parts */
576 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
577 memmove(p + n_leading_chars,
578 pnumeric_chars,
579 n_digits * sizeof(STRINGLIB_CHAR));
580
Eric Smithcf537ff2008-05-11 19:52:48 +0000581 /* If type is 'X', convert to uppercase */
Eric Smitha9f7d622008-02-17 19:46:49 +0000582 if (format->type == 'X') {
583 Py_ssize_t t;
Eric Smith8a803dd2008-02-20 23:39:28 +0000584 for (t = 0; t < n_digits; ++t)
Eric Smitha9f7d622008-02-17 19:46:49 +0000585 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
586 }
587
Eric Smithcf537ff2008-05-11 19:52:48 +0000588 /* Insert the grouping, if any, after the uppercasing of 'X', so we can
589 ensure that grouping chars won't be affeted. */
590 if (n_grouping_chars && format->type == 'n') {
591 /* We know this can't fail, since we've already
592 reserved enough space. */
593 STRINGLIB_CHAR *pstart = p + n_leading_chars;
594 int r = STRINGLIB_GROUPING(pstart, n_digits,
595 pstart + n_digits,
596 spec.n_total+n_grouping_chars-n_leading_chars,
597 NULL, 0);
598 assert(r);
599 }
600
Eric Smitha9f7d622008-02-17 19:46:49 +0000601 /* Fill in the non-digit parts */
602 fill_number(p, &spec, n_digits,
603 format->fill_char == '\0' ? ' ' : format->fill_char);
604
605done:
606 Py_XDECREF(tmp);
607 return result;
608}
609#endif /* defined FORMAT_LONG || defined FORMAT_INT */
610
611/************************************************************************/
612/*********** float formatting *******************************************/
613/************************************************************************/
614
615#ifdef FORMAT_FLOAT
616#if STRINGLIB_IS_UNICODE
617/* taken from unicodeobject.c */
618static Py_ssize_t
619strtounicode(Py_UNICODE *buffer, const char *charbuffer)
620{
621 register Py_ssize_t i;
622 Py_ssize_t len = strlen(charbuffer);
Eric Smith8a803dd2008-02-20 23:39:28 +0000623 for (i = len - 1; i >= 0; --i)
Eric Smitha9f7d622008-02-17 19:46:49 +0000624 buffer[i] = (Py_UNICODE) charbuffer[i];
625
626 return len;
627}
628#endif
629
Eric Smitha9f7d622008-02-17 19:46:49 +0000630/* see FORMATBUFLEN in unicodeobject.c */
631#define FLOAT_FORMATBUFLEN 120
632
633/* much of this is taken from unicodeobject.c */
Eric Smitha9f7d622008-02-17 19:46:49 +0000634static PyObject *
Eric Smith8a803dd2008-02-20 23:39:28 +0000635format_float_internal(PyObject *value,
636 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000637{
638 /* fmt = '%.' + `prec` + `type` + '%%'
639 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
640 char fmt[20];
641
642 /* taken from unicodeobject.c */
643 /* Worst case length calc to ensure no buffer overrun:
644
645 'g' formats:
646 fmt = %#.<prec>g
647 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
648 for any double rep.)
649 len = 1 + prec + 1 + 2 + 5 = 9 + prec
650
651 'f' formats:
652 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
653 len = 1 + 50 + 1 + prec = 52 + prec
654
655 If prec=0 the effective precision is 1 (the leading digit is
656 always given), therefore increase the length by one.
657
658 */
659 char charbuf[FLOAT_FORMATBUFLEN];
660 Py_ssize_t n_digits;
661 double x;
662 Py_ssize_t precision = format->precision;
663 PyObject *result = NULL;
664 STRINGLIB_CHAR sign;
665 char* trailing = "";
666 STRINGLIB_CHAR *p;
667 NumberFieldWidths spec;
Eric Smith8a803dd2008-02-20 23:39:28 +0000668 STRINGLIB_CHAR type = format->type;
Eric Smitha9f7d622008-02-17 19:46:49 +0000669
670#if STRINGLIB_IS_UNICODE
671 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
672#endif
673
674 /* first, do the conversion as 8-bit chars, using the platform's
675 snprintf. then, if needed, convert to unicode. */
676
677 /* 'F' is the same as 'f', per the PEP */
678 if (type == 'F')
679 type = 'f';
680
681 x = PyFloat_AsDouble(value);
682
683 if (x == -1.0 && PyErr_Occurred())
684 goto done;
685
686 if (type == '%') {
687 type = 'f';
688 x *= 100;
689 trailing = "%";
690 }
691
692 if (precision < 0)
693 precision = 6;
694 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
695 type = 'g';
696
697 /* cast "type", because if we're in unicode we need to pass a
698 8-bit char. this is safe, because we've restricted what "type"
699 can be */
700 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
701 (char)type);
702
Eric Smith8a803dd2008-02-20 23:39:28 +0000703 /* do the actual formatting */
704 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smitha9f7d622008-02-17 19:46:49 +0000705
706 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
707 sure why. we'll just concatentate it here, no harm done. we
708 know we can't have a buffer overflow from the fmt size
709 analysis */
710 strcat(charbuf, trailing);
711
712 /* rather than duplicate the code for snprintf for both unicode
713 and 8 bit strings, we just use the 8 bit version and then
714 convert to unicode in a separate code path. that's probably
715 the lesser of 2 evils. */
716#if STRINGLIB_IS_UNICODE
717 n_digits = strtounicode(unicodebuf, charbuf);
718 p = unicodebuf;
719#else
720 /* compute the length. I believe this is done because the return
721 value from snprintf above is unreliable */
722 n_digits = strlen(charbuf);
723 p = charbuf;
724#endif
725
726 /* is a sign character present in the output? if so, remember it
727 and skip it */
728 sign = p[0];
729 if (sign == '-') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000730 ++p;
731 --n_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000732 }
733
734 calc_number_widths(&spec, sign, n_digits, format);
735
736 /* allocate a string with enough space */
737 result = STRINGLIB_NEW(NULL, spec.n_total);
738 if (result == NULL)
739 goto done;
740
741 /* fill in the non-digit parts */
742 fill_number(STRINGLIB_STR(result), &spec, n_digits,
743 format->fill_char == '\0' ? ' ' : format->fill_char);
744
745 /* fill in the digit parts */
746 memmove(STRINGLIB_STR(result) +
747 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
748 p,
749 n_digits * sizeof(STRINGLIB_CHAR));
750
751done:
752 return result;
753}
Eric Smitha9f7d622008-02-17 19:46:49 +0000754#endif /* FORMAT_FLOAT */
755
756/************************************************************************/
757/*********** built in formatters ****************************************/
758/************************************************************************/
759#ifdef FORMAT_STRING
760PyObject *
761FORMAT_STRING(PyObject* value, PyObject* args)
762{
763 PyObject *format_spec;
764 PyObject *result = NULL;
765#if PY_VERSION_HEX < 0x03000000
766 PyObject *tmp = NULL;
767#endif
768 InternalFormatSpec format;
769
770 /* If 2.x, we accept either str or unicode, and try to convert it
771 to the right type. In 3.x, we insist on only unicode */
772#if PY_VERSION_HEX >= 0x03000000
773 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
774 &format_spec))
775 goto done;
776#else
777 /* If 2.x, convert format_spec to the same type as value */
778 /* This is to allow things like u''.format('') */
779 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
780 goto done;
781 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
782 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
783 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
784 goto done;
785 }
786 tmp = STRINGLIB_TOSTR(format_spec);
787 if (tmp == NULL)
788 goto done;
789 format_spec = tmp;
790#endif
791
792 /* check for the special case of zero length format spec, make
793 it equivalent to str(value) */
794 if (STRINGLIB_LEN(format_spec) == 0) {
795 result = STRINGLIB_TOSTR(value);
796 goto done;
797 }
798
799
800 /* parse the format_spec */
801 if (!parse_internal_render_format_spec(format_spec, &format, 's'))
802 goto done;
803
804 /* type conversion? */
805 switch (format.type) {
806 case 's':
807 /* no type conversion needed, already a string. do the formatting */
808 result = format_string_internal(value, &format);
809 break;
810 default:
811 /* unknown */
Martin v. Löwisd918e4e2008-04-07 03:08:28 +0000812 #if STRINGLIB_IS_UNICODE
813 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
814 hence the two cases. If it is char, gcc complains that the
815 condition below is always true, hence the ifdef. */
816 if (format.type > 32 && format.type <128)
817 #endif
818 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
819 (char)format.type);
820 #if STRINGLIB_IS_UNICODE
821 else
822 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
823 (unsigned int)format.type);
824 #endif
Eric Smitha9f7d622008-02-17 19:46:49 +0000825 goto done;
826 }
827
828done:
829#if PY_VERSION_HEX < 0x03000000
830 Py_XDECREF(tmp);
831#endif
832 return result;
833}
834#endif /* FORMAT_STRING */
835
836#if defined FORMAT_LONG || defined FORMAT_INT
837static PyObject*
838format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
839{
840 PyObject *format_spec;
841 PyObject *result = NULL;
842 PyObject *tmp = NULL;
843 InternalFormatSpec format;
844
845 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
846 &format_spec))
847 goto done;
848
849 /* check for the special case of zero length format spec, make
850 it equivalent to str(value) */
851 if (STRINGLIB_LEN(format_spec) == 0) {
852 result = STRINGLIB_TOSTR(value);
853 goto done;
854 }
855
856 /* parse the format_spec */
857 if (!parse_internal_render_format_spec(format_spec, &format, 'd'))
858 goto done;
859
860 /* type conversion? */
861 switch (format.type) {
862 case 'b':
863 case 'c':
864 case 'd':
865 case 'o':
866 case 'x':
867 case 'X':
Eric Smithcf537ff2008-05-11 19:52:48 +0000868 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000869 /* no type conversion needed, already an int (or long). do
870 the formatting */
871 result = format_int_or_long_internal(value, &format, tostring);
872 break;
873
874 case 'e':
875 case 'E':
876 case 'f':
877 case 'F':
878 case 'g':
879 case 'G':
Eric Smitha9f7d622008-02-17 19:46:49 +0000880 case '%':
881 /* convert to float */
882 tmp = PyNumber_Float(value);
883 if (tmp == NULL)
884 goto done;
885 result = format_float_internal(value, &format);
886 break;
887
888 default:
889 /* unknown */
890 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
891 format.type);
892 goto done;
893 }
894
895done:
896 Py_XDECREF(tmp);
897 return result;
898}
899#endif /* FORMAT_LONG || defined FORMAT_INT */
900
901#ifdef FORMAT_LONG
902/* Need to define long_format as a function that will convert a long
903 to a string. In 3.0, _PyLong_Format has the correct signature. In
904 2.x, we need to fudge a few parameters */
905#if PY_VERSION_HEX >= 0x03000000
906#define long_format _PyLong_Format
907#else
908static PyObject*
909long_format(PyObject* value, int base)
910{
911 /* Convert to base, don't add trailing 'L', and use the new octal
912 format. We already know this is a long object */
913 assert(PyLong_Check(value));
914 /* convert to base, don't add 'L', and use the new octal format */
915 return _PyLong_Format(value, base, 0, 1);
916}
917#endif
918
919PyObject *
920FORMAT_LONG(PyObject* value, PyObject* args)
921{
922 return format_int_or_long(value, args, long_format);
923}
924#endif /* FORMAT_LONG */
925
926#ifdef FORMAT_INT
927/* this is only used for 2.x, not 3.0 */
928static PyObject*
929int_format(PyObject* value, int base)
930{
931 /* Convert to base, and use the new octal format. We already
932 know this is an int object */
933 assert(PyInt_Check(value));
934 return _PyInt_Format((PyIntObject*)value, base, 1);
935}
936
937PyObject *
938FORMAT_INT(PyObject* value, PyObject* args)
939{
940 return format_int_or_long(value, args, int_format);
941}
942#endif /* FORMAT_INT */
943
944#ifdef FORMAT_FLOAT
945PyObject *
946FORMAT_FLOAT(PyObject *value, PyObject *args)
947{
948 PyObject *format_spec;
949 PyObject *result = NULL;
950 InternalFormatSpec format;
951
952 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec))
953 goto done;
954
955 /* check for the special case of zero length format spec, make
956 it equivalent to str(value) */
957 if (STRINGLIB_LEN(format_spec) == 0) {
958 result = STRINGLIB_TOSTR(value);
959 goto done;
960 }
961
962 /* parse the format_spec */
Eric Smith8113ca62008-03-17 11:01:01 +0000963 if (!parse_internal_render_format_spec(format_spec, &format, '\0'))
Eric Smitha9f7d622008-02-17 19:46:49 +0000964 goto done;
965
966 /* type conversion? */
967 switch (format.type) {
Eric Smith8113ca62008-03-17 11:01:01 +0000968 case '\0':
969 /* 'Z' means like 'g', but with at least one decimal. See
970 PyOS_ascii_formatd */
971 format.type = 'Z';
972 /* Deliberate fall through to the next case statement */
Eric Smitha9f7d622008-02-17 19:46:49 +0000973 case 'e':
974 case 'E':
975 case 'f':
976 case 'F':
977 case 'g':
978 case 'G':
979 case 'n':
980 case '%':
981 /* no conversion, already a float. do the formatting */
982 result = format_float_internal(value, &format);
983 break;
984
985 default:
986 /* unknown */
987 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
988 format.type);
989 goto done;
990 }
991
992done:
993 return result;
994}
995#endif /* FORMAT_FLOAT */