blob: 39da6b3103adec5eae7d509466a01598b889d8c5 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
Eric Smithb7f5ba12007-08-29 12:38:45 +000016#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smith8c663262007-08-25 02:26:07 +000018/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +000078 case ' ': case '+': case '-':
Eric Smith44300952007-08-29 12:43:12 +000079#if ALLOW_PARENS_FOR_SIGN
Eric Smithb7f5ba12007-08-29 12:38:45 +000080 case '(':
Eric Smith44300952007-08-29 12:43:12 +000081#endif
Eric Smith8c663262007-08-25 02:26:07 +000082 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
92 STRINGLIB_CHAR sign;
93 Py_ssize_t width;
94 Py_ssize_t precision;
95 STRINGLIB_CHAR type;
96} InternalFormatSpec;
97
98/*
99 ptr points to the start of the format_spec, end points just past its end.
100 fills in format with the parsed information.
101 returns 1 on success, 0 on failure.
102 if failure, sets the exception
103*/
104static int
105parse_internal_render_format_spec(PyObject *format_spec,
106 InternalFormatSpec *format,
107 char default_type)
108{
109 STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec);
110 STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec);
111
112 /* end-ptr is used throughout this code to specify the length of
113 the input string */
114
115 Py_ssize_t specified_width;
116
117 format->fill_char = '\0';
118 format->align = '\0';
119 format->sign = '\0';
120 format->width = -1;
121 format->precision = -1;
122 format->type = default_type;
123
124 /* If the second char is an alignment token,
125 then parse the fill char */
126 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
127 format->align = ptr[1];
128 format->fill_char = ptr[0];
129 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000130 }
131 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000132 format->align = ptr[0];
133 ptr++;
134 }
135
136 /* Parse the various sign options */
137 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
138 format->sign = ptr[0];
139 ptr++;
Eric Smithb7f5ba12007-08-29 12:38:45 +0000140#if ALLOW_PARENS_FOR_SIGN
Eric Smith8c663262007-08-25 02:26:07 +0000141 if (end-ptr >= 1 && ptr[0] == ')') {
142 ptr++;
143 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000144#endif
Eric Smith8c663262007-08-25 02:26:07 +0000145 }
146
147 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000148 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000149 format->fill_char = '0';
150 if (format->align == '\0') {
151 format->align = '=';
152 }
153 ptr++;
154 }
155
156 /* XXX add error checking */
157 specified_width = get_integer(&ptr, end, &format->width);
158
159 /* if specified_width is 0, we didn't consume any characters for
160 the width. in that case, reset the width to -1, because
161 get_integer() will have set it to zero */
162 if (specified_width == 0) {
163 format->width = -1;
164 }
165
166 /* Parse field precision */
167 if (end-ptr && ptr[0] == '.') {
168 ptr++;
169
170 /* XXX add error checking */
171 specified_width = get_integer(&ptr, end, &format->precision);
172
173 /* not having a precision after a dot is an error */
174 if (specified_width == 0) {
175 PyErr_Format(PyExc_ValueError,
176 "Format specifier missing precision");
177 return 0;
178 }
179
180 }
181
182 /* Finally, parse the type field */
183
184 if (end-ptr > 1) {
185 /* invalid conversion spec */
186 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
187 return 0;
188 }
189
190 if (end-ptr == 1) {
191 format->type = ptr[0];
192 ptr++;
193 }
194
195 return 1;
196}
197
Eric Smith8fd3eba2008-02-17 19:48:00 +0000198#if defined FORMAT_FLOAT || defined FORMAT_LONG
Eric Smith8c663262007-08-25 02:26:07 +0000199/************************************************************************/
200/*********** common routines for numeric formatting *********************/
201/************************************************************************/
202
203/* describes the layout for an integer, see the comment in
204 _calc_integer_widths() for details */
205typedef struct {
206 Py_ssize_t n_lpadding;
207 Py_ssize_t n_spadding;
208 Py_ssize_t n_rpadding;
209 char lsign;
210 Py_ssize_t n_lsign;
211 char rsign;
212 Py_ssize_t n_rsign;
213 Py_ssize_t n_total; /* just a convenience, it's derivable from the
214 other fields */
215} NumberFieldWidths;
216
217/* not all fields of format are used. for example, precision is
218 unused. should this take discrete params in order to be more clear
219 about what it does? or is passing a single format parameter easier
220 and more efficient enough to justify a little obfuscation? */
221static void
222calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
223 Py_ssize_t n_digits, const InternalFormatSpec *format)
224{
225 r->n_lpadding = 0;
226 r->n_spadding = 0;
227 r->n_rpadding = 0;
228 r->lsign = '\0';
229 r->n_lsign = 0;
230 r->rsign = '\0';
231 r->n_rsign = 0;
232
233 /* the output will look like:
234 | |
235 | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
236 | |
237
238 lsign and rsign are computed from format->sign and the actual
239 sign of the number
240
241 digits is already known
242
243 the total width is either given, or computed from the
244 actual digits
245
246 only one of lpadding, spadding, and rpadding can be non-zero,
247 and it's calculated from the width and other fields
248 */
249
250 /* compute the various parts we're going to write */
251 if (format->sign == '+') {
252 /* always put a + or - */
253 r->n_lsign = 1;
254 r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith0cb431c2007-08-28 01:07:27 +0000255 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000256#if ALLOW_PARENS_FOR_SIGN
Eric Smith0cb431c2007-08-28 01:07:27 +0000257 else if (format->sign == '(') {
Eric Smith8c663262007-08-25 02:26:07 +0000258 if (actual_sign == '-') {
259 r->n_lsign = 1;
260 r->lsign = '(';
261 r->n_rsign = 1;
262 r->rsign = ')';
263 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000264 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000265#endif
Eric Smith0cb431c2007-08-28 01:07:27 +0000266 else if (format->sign == ' ') {
Eric Smith8c663262007-08-25 02:26:07 +0000267 r->n_lsign = 1;
268 r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith0cb431c2007-08-28 01:07:27 +0000269 }
270 else {
Eric Smith8c663262007-08-25 02:26:07 +0000271 /* non specified, or the default (-) */
272 if (actual_sign == '-') {
273 r->n_lsign = 1;
274 r->lsign = '-';
275 }
276 }
277
278 /* now the number of padding characters */
279 if (format->width == -1) {
280 /* no padding at all, nothing to do */
Eric Smith0cb431c2007-08-28 01:07:27 +0000281 }
282 else {
Eric Smith8c663262007-08-25 02:26:07 +0000283 /* see if any padding is needed */
284 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
285 /* no padding needed, we're already bigger than the
286 requested width */
Eric Smith0cb431c2007-08-28 01:07:27 +0000287 }
288 else {
Eric Smith8c663262007-08-25 02:26:07 +0000289 /* determine which of left, space, or right padding is
290 needed */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000291 Py_ssize_t padding = format->width -
292 (r->n_lsign + n_digits + r->n_rsign);
Eric Smith8c663262007-08-25 02:26:07 +0000293 if (format->align == '<')
294 r->n_rpadding = padding;
295 else if (format->align == '>')
296 r->n_lpadding = padding;
297 else if (format->align == '^') {
298 r->n_lpadding = padding / 2;
299 r->n_rpadding = padding - r->n_lpadding;
Eric Smith0cb431c2007-08-28 01:07:27 +0000300 }
Eric Smith185e30c2007-08-30 22:23:08 +0000301 else if (format->align == '=')
Eric Smith8c663262007-08-25 02:26:07 +0000302 r->n_spadding = padding;
Eric Smith185e30c2007-08-30 22:23:08 +0000303 else
304 r->n_lpadding = padding;
Eric Smith8c663262007-08-25 02:26:07 +0000305 }
306 }
307 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
308 n_digits + r->n_rsign + r->n_rpadding;
309}
310
311/* fill in the non-digit parts of a numbers's string representation,
312 as determined in _calc_integer_widths(). returns the pointer to
313 where the digits go. */
314static STRINGLIB_CHAR *
315fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
316 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
317{
318 STRINGLIB_CHAR* p_digits;
319
320 if (spec->n_lpadding) {
321 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
322 p_buf += spec->n_lpadding;
323 }
324 if (spec->n_lsign == 1) {
325 *p_buf++ = spec->lsign;
326 }
327 if (spec->n_spadding) {
328 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
329 p_buf += spec->n_spadding;
330 }
331 p_digits = p_buf;
332 p_buf += n_digits;
333 if (spec->n_rsign == 1) {
334 *p_buf++ = spec->rsign;
335 }
336 if (spec->n_rpadding) {
337 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
338 p_buf += spec->n_rpadding;
339 }
340 return p_digits;
341}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000342#endif /* FORMAT_FLOAT || FORMAT_LONG */
Eric Smith8c663262007-08-25 02:26:07 +0000343
344/************************************************************************/
345/*********** string formatting ******************************************/
346/************************************************************************/
347
348static PyObject *
349format_string_internal(PyObject *value, const InternalFormatSpec *format)
350{
351 Py_ssize_t width; /* total field width */
352 Py_ssize_t lpad;
353 STRINGLIB_CHAR *dst;
354 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
355 Py_ssize_t len = STRINGLIB_LEN(value);
356 PyObject *result = NULL;
357
358 /* sign is not allowed on strings */
359 if (format->sign != '\0') {
360 PyErr_SetString(PyExc_ValueError,
361 "Sign not allowed in string format specifier");
362 goto done;
363 }
364
365 /* '=' alignment not allowed on strings */
366 if (format->align == '=') {
367 PyErr_SetString(PyExc_ValueError,
368 "'=' alignment not allowed "
369 "in string format specifier");
370 goto done;
371 }
372
373 /* if precision is specified, output no more that format.precision
374 characters */
375 if (format->precision >= 0 && len >= format->precision) {
376 len = format->precision;
377 }
378
379 if (format->width >= 0) {
380 width = format->width;
381
382 /* but use at least len characters */
383 if (len > width) {
384 width = len;
385 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000386 }
387 else {
Eric Smith8c663262007-08-25 02:26:07 +0000388 /* not specified, use all of the chars and no more */
389 width = len;
390 }
391
392 /* allocate the resulting string */
393 result = STRINGLIB_NEW(NULL, width);
394 if (result == NULL)
395 goto done;
396
397 /* now write into that space */
398 dst = STRINGLIB_STR(result);
399
400 /* figure out how much leading space we need, based on the
401 aligning */
402 if (format->align == '>')
403 lpad = width - len;
404 else if (format->align == '^')
405 lpad = (width - len) / 2;
406 else
407 lpad = 0;
408
409 /* if right aligning, increment the destination allow space on the
410 left */
411 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
412
413 /* do any padding */
414 if (width > len) {
415 STRINGLIB_CHAR fill_char = format->fill_char;
416 if (fill_char == '\0') {
417 /* use the default, if not specified */
418 fill_char = ' ';
419 }
420
421 /* pad on left */
422 if (lpad)
423 STRINGLIB_FILL(dst, fill_char, lpad);
424
425 /* pad on right */
426 if (width - len - lpad)
427 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
428 }
429
430done:
431 return result;
432}
433
434
435/************************************************************************/
436/*********** long formatting ********************************************/
437/************************************************************************/
438
Eric Smith8fd3eba2008-02-17 19:48:00 +0000439#if defined FORMAT_LONG || defined FORMAT_INT
440typedef PyObject*
441(*IntOrLongToString)(PyObject *value, int base);
442
Eric Smith8c663262007-08-25 02:26:07 +0000443static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000444format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
445 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000446{
447 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000448 PyObject *tmp = NULL;
449 STRINGLIB_CHAR *pnumeric_chars;
450 STRINGLIB_CHAR numeric_char;
Eric Smith8c663262007-08-25 02:26:07 +0000451 STRINGLIB_CHAR sign = '\0';
452 STRINGLIB_CHAR *p;
453 Py_ssize_t n_digits; /* count of digits need from the computed
454 string */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000455 Py_ssize_t n_leading_chars;
Eric Smith8c663262007-08-25 02:26:07 +0000456 NumberFieldWidths spec;
457 long x;
458
459 /* no precision allowed on integers */
460 if (format->precision != -1) {
461 PyErr_SetString(PyExc_ValueError,
462 "Precision not allowed in integer format specifier");
463 goto done;
464 }
465
466
467 /* special case for character formatting */
468 if (format->type == 'c') {
469 /* error to specify a sign */
470 if (format->sign != '\0') {
471 PyErr_SetString(PyExc_ValueError,
472 "Sign not allowed with integer"
473 " format specifier 'c'");
474 goto done;
475 }
476
477 /* taken from unicodeobject.c formatchar() */
478 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000479/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000480 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000481 if (x == -1 && PyErr_Occurred())
482 goto done;
483#ifdef Py_UNICODE_WIDE
484 if (x < 0 || x > 0x10ffff) {
485 PyErr_SetString(PyExc_OverflowError,
486 "%c arg not in range(0x110000) "
487 "(wide Python build)");
488 goto done;
489 }
490#else
491 if (x < 0 || x > 0xffff) {
492 PyErr_SetString(PyExc_OverflowError,
493 "%c arg not in range(0x10000) "
494 "(narrow Python build)");
495 goto done;
496 }
497#endif
Eric Smith8fd3eba2008-02-17 19:48:00 +0000498 numeric_char = (STRINGLIB_CHAR)x;
499 pnumeric_chars = &numeric_char;
500 n_digits = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000501 }
502 else {
Eric Smith8c663262007-08-25 02:26:07 +0000503 int base;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000504 int leading_chars_to_skip; /* Number of characters added by
505 PyNumber_ToBase that we want to
506 skip over. */
507
508 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000509 PyNumber_ToBase */
510 switch (format->type) {
511 case 'b':
512 base = 2;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000513 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000514 break;
515 case 'o':
516 base = 8;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000517 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000518 break;
519 case 'x':
520 case 'X':
521 base = 16;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000522 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000523 break;
524 default: /* shouldn't be needed, but stops a compiler warning */
525 case 'd':
526 base = 10;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000527 leading_chars_to_skip = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000528 break;
529 }
530
Eric Smith8fd3eba2008-02-17 19:48:00 +0000531 /* Do the hard part, converting to a string in a given base */
532 tmp = tostring(value, base);
533 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000534 goto done;
535
Eric Smith8fd3eba2008-02-17 19:48:00 +0000536 pnumeric_chars = STRINGLIB_STR(tmp);
537 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000538
Eric Smith8fd3eba2008-02-17 19:48:00 +0000539 /* Remember not to modify what pnumeric_chars points to. it
540 might be interned. Only modify it after we copy it into a
541 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000542
Eric Smith8fd3eba2008-02-17 19:48:00 +0000543 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000544 and skip it */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000545 sign = pnumeric_chars[0];
Eric Smith8c663262007-08-25 02:26:07 +0000546 if (sign == '-') {
Eric Smith8fd3eba2008-02-17 19:48:00 +0000547 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000548 }
549
Eric Smith8fd3eba2008-02-17 19:48:00 +0000550 /* Skip over the leading chars (0x, 0b, etc.) */
551 n_digits -= leading_chars_to_skip;
552 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000553 }
554
Eric Smith8fd3eba2008-02-17 19:48:00 +0000555 /* Calculate the widths of the various leading and trailing parts */
Eric Smith8c663262007-08-25 02:26:07 +0000556 calc_number_widths(&spec, sign, n_digits, format);
557
Eric Smith8fd3eba2008-02-17 19:48:00 +0000558 /* Allocate a new string to hold the result */
559 result = STRINGLIB_NEW(NULL, spec.n_total);
560 if (!result)
561 goto done;
562 p = STRINGLIB_STR(result);
Eric Smith8c663262007-08-25 02:26:07 +0000563
Eric Smith8fd3eba2008-02-17 19:48:00 +0000564 /* Fill in the digit parts */
565 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
566 memmove(p + n_leading_chars,
567 pnumeric_chars,
568 n_digits * sizeof(STRINGLIB_CHAR));
569
570 /* if X, convert to uppercase */
571 if (format->type == 'X') {
572 Py_ssize_t t;
573 for (t = 0; t < n_digits; t++)
574 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith8c663262007-08-25 02:26:07 +0000575 }
576
Eric Smith8fd3eba2008-02-17 19:48:00 +0000577 /* Fill in the non-digit parts */
Eric Smith8c663262007-08-25 02:26:07 +0000578 fill_number(p, &spec, n_digits,
579 format->fill_char == '\0' ? ' ' : format->fill_char);
580
Eric Smith8c663262007-08-25 02:26:07 +0000581done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000582 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000583 return result;
584}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000585#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000586
587/************************************************************************/
588/*********** float formatting *******************************************/
589/************************************************************************/
590
Eric Smith8fd3eba2008-02-17 19:48:00 +0000591#ifdef FORMAT_FLOAT
592#if STRINGLIB_IS_UNICODE
Eric Smith8c663262007-08-25 02:26:07 +0000593/* taken from unicodeobject.c */
594static Py_ssize_t
595strtounicode(Py_UNICODE *buffer, const char *charbuffer)
596{
597 register Py_ssize_t i;
598 Py_ssize_t len = strlen(charbuffer);
599 for (i = len - 1; i >= 0; i--)
Eric Smith185e30c2007-08-30 22:23:08 +0000600 buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000601
602 return len;
603}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000604#endif
Eric Smith8c663262007-08-25 02:26:07 +0000605
606/* the callback function to call to do the actual float formatting.
607 it matches the definition of PyOS_ascii_formatd */
608typedef char*
609(*DoubleSnprintfFunction)(char *buffer, size_t buf_len,
610 const char *format, double d);
611
612/* just a wrapper to make PyOS_snprintf look like DoubleSnprintfFunction */
613static char*
614snprintf_double(char *buffer, size_t buf_len, const char *format, double d)
615{
616 PyOS_snprintf(buffer, buf_len, format, d);
617 return NULL;
618}
619
620/* see FORMATBUFLEN in unicodeobject.c */
621#define FLOAT_FORMATBUFLEN 120
622
623/* much of this is taken from unicodeobject.c */
624/* use type instead of format->type, so that it can be overridden by
625 format_number() */
626static PyObject *
627_format_float(STRINGLIB_CHAR type, PyObject *value,
628 const InternalFormatSpec *format,
629 DoubleSnprintfFunction snprintf)
630{
631 /* fmt = '%.' + `prec` + `type` + '%%'
632 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
633 char fmt[20];
634
635 /* taken from unicodeobject.c */
636 /* Worst case length calc to ensure no buffer overrun:
637
638 'g' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000639 fmt = %#.<prec>g
640 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
641 for any double rep.)
642 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000643
644 'f' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000645 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
646 len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000647
648 If prec=0 the effective precision is 1 (the leading digit is
649 always given), therefore increase the length by one.
650
651 */
652 char charbuf[FLOAT_FORMATBUFLEN];
653 Py_ssize_t n_digits;
654 double x;
655 Py_ssize_t precision = format->precision;
656 PyObject *result = NULL;
657 STRINGLIB_CHAR sign;
658 char* trailing = "";
659 STRINGLIB_CHAR *p;
660 NumberFieldWidths spec;
661
662#if STRINGLIB_IS_UNICODE
663 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
664#endif
665
666 /* first, do the conversion as 8-bit chars, using the platform's
667 snprintf. then, if needed, convert to unicode. */
668
669 /* 'F' is the same as 'f', per the PEP */
670 if (type == 'F')
671 type = 'f';
672
673 x = PyFloat_AsDouble(value);
674
675 if (x == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000676 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000677
678 if (type == '%') {
679 type = 'f';
680 x *= 100;
681 trailing = "%";
682 }
683
684 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000685 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000686 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith185e30c2007-08-30 22:23:08 +0000687 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000688
689 /* cast "type", because if we're in unicode we need to pass a
690 8-bit char. this is safe, because we've restricted what "type"
691 can be */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000692 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
693 (char)type);
Eric Smith8c663262007-08-25 02:26:07 +0000694
695 /* call the passed in function to do the actual formatting */
696 snprintf(charbuf, sizeof(charbuf), fmt, x);
697
698 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
699 sure why. we'll just concatentate it here, no harm done. we
700 know we can't have a buffer overflow from the fmt size
701 analysis */
702 strcat(charbuf, trailing);
703
704 /* rather than duplicate the code for snprintf for both unicode
705 and 8 bit strings, we just use the 8 bit version and then
706 convert to unicode in a separate code path. that's probably
707 the lesser of 2 evils. */
708#if STRINGLIB_IS_UNICODE
709 n_digits = strtounicode(unicodebuf, charbuf);
710 p = unicodebuf;
711#else
712 /* compute the length. I believe this is done because the return
713 value from snprintf above is unreliable */
714 n_digits = strlen(charbuf);
715 p = charbuf;
716#endif
717
718 /* is a sign character present in the output? if so, remember it
719 and skip it */
720 sign = p[0];
721 if (sign == '-') {
722 p++;
723 n_digits--;
724 }
725
726 calc_number_widths(&spec, sign, n_digits, format);
727
728 /* allocate a string with enough space */
729 result = STRINGLIB_NEW(NULL, spec.n_total);
730 if (result == NULL)
731 goto done;
732
733 /* fill in the non-digit parts */
734 fill_number(STRINGLIB_STR(result), &spec, n_digits,
735 format->fill_char == '\0' ? ' ' : format->fill_char);
736
737 /* fill in the digit parts */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000738 memmove(STRINGLIB_STR(result) +
739 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith8c663262007-08-25 02:26:07 +0000740 p,
741 n_digits * sizeof(STRINGLIB_CHAR));
742
743done:
744 return result;
745}
746
747static PyObject *
748format_float_internal(PyObject *value, const InternalFormatSpec *format)
749{
750 if (format->type == 'n')
751 return _format_float('f', value, format, snprintf_double);
752 else
753 return _format_float(format->type, value, format, PyOS_ascii_formatd);
754}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000755#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +0000756
757/************************************************************************/
758/*********** built in formatters ****************************************/
759/************************************************************************/
Eric Smith8fd3eba2008-02-17 19:48:00 +0000760#ifdef FORMAT_STRING
Eric Smith8c663262007-08-25 02:26:07 +0000761PyObject *
762FORMAT_STRING(PyObject* value, PyObject* args)
763{
764 PyObject *format_spec;
Eric Smith8c663262007-08-25 02:26:07 +0000765 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000766#if PY_VERSION_HEX < 0x03000000
767 PyObject *tmp = NULL;
768#endif
Eric Smith8c663262007-08-25 02:26:07 +0000769 InternalFormatSpec format;
770
Eric Smith8fd3eba2008-02-17 19:48:00 +0000771 /* If 2.x, we accept either str or unicode, and try to convert it
772 to the right type. In 3.x, we insist on only unicode */
773#if PY_VERSION_HEX >= 0x03000000
774 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
775 &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000776 goto done;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000777#else
778 /* If 2.x, convert format_spec to the same type as value */
779 /* This is to allow things like u''.format('') */
780 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
781 goto done;
782 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
783 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
784 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
785 goto done;
786 }
787 tmp = STRINGLIB_TOSTR(format_spec);
788 if (tmp == NULL)
789 goto done;
790 format_spec = tmp;
791#endif
Eric Smith8c663262007-08-25 02:26:07 +0000792
793 /* check for the special case of zero length format spec, make
794 it equivalent to str(value) */
795 if (STRINGLIB_LEN(format_spec) == 0) {
796 result = STRINGLIB_TOSTR(value);
797 goto done;
798 }
799
Eric Smith8fd3eba2008-02-17 19:48:00 +0000800
Eric Smith8c663262007-08-25 02:26:07 +0000801 /* parse the format_spec */
802 if (!parse_internal_render_format_spec(format_spec, &format, 's'))
803 goto done;
804
805 /* type conversion? */
806 switch (format.type) {
807 case 's':
808 /* no type conversion needed, already a string. do the formatting */
809 result = format_string_internal(value, &format);
810 break;
Eric Smith8c663262007-08-25 02:26:07 +0000811 default:
812 /* unknown */
813 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
814 format.type);
815 goto done;
816 }
817
818done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000819#if PY_VERSION_HEX < 0x03000000
820 Py_XDECREF(tmp);
821#endif
Eric Smith8c663262007-08-25 02:26:07 +0000822 return result;
823}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000824#endif /* FORMAT_STRING */
Eric Smith8c663262007-08-25 02:26:07 +0000825
Eric Smith8fd3eba2008-02-17 19:48:00 +0000826#if defined FORMAT_LONG || defined FORMAT_INT
827static PyObject*
828format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000829{
830 PyObject *format_spec;
831 PyObject *result = NULL;
832 PyObject *tmp = NULL;
833 InternalFormatSpec format;
834
Eric Smith8fd3eba2008-02-17 19:48:00 +0000835 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
836 &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000837 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000838
839 /* check for the special case of zero length format spec, make
840 it equivalent to str(value) */
841 if (STRINGLIB_LEN(format_spec) == 0) {
842 result = STRINGLIB_TOSTR(value);
843 goto done;
844 }
845
846 /* parse the format_spec */
847 if (!parse_internal_render_format_spec(format_spec, &format, 'd'))
848 goto done;
849
850 /* type conversion? */
851 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000852 case 'b':
853 case 'c':
854 case 'd':
855 case 'o':
856 case 'x':
857 case 'X':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000858 /* no type conversion needed, already an int (or long). do
859 the formatting */
860 result = format_int_or_long_internal(value, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +0000861 break;
862
Eric Smithfa767ef2008-01-28 10:59:27 +0000863 case 'e':
864 case 'E':
865 case 'f':
866 case 'F':
867 case 'g':
868 case 'G':
869 case 'n':
870 case '%':
871 /* convert to float */
872 tmp = PyNumber_Float(value);
873 if (tmp == NULL)
874 goto done;
875 result = format_float_internal(value, &format);
876 break;
877
Eric Smith8c663262007-08-25 02:26:07 +0000878 default:
879 /* unknown */
880 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
881 format.type);
882 goto done;
883 }
884
885done:
886 Py_XDECREF(tmp);
887 return result;
888}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000889#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000890
Eric Smith8fd3eba2008-02-17 19:48:00 +0000891#ifdef FORMAT_LONG
892/* Need to define long_format as a function that will convert a long
893 to a string. In 3.0, _PyLong_Format has the correct signature. In
894 2.x, we need to fudge a few parameters */
895#if PY_VERSION_HEX >= 0x03000000
896#define long_format _PyLong_Format
897#else
898static PyObject*
899long_format(PyObject* value, int base)
900{
901 /* Convert to base, don't add trailing 'L', and use the new octal
902 format. We already know this is a long object */
903 assert(PyLong_Check(value));
904 /* convert to base, don't add 'L', and use the new octal format */
905 return _PyLong_Format(value, base, 0, 1);
906}
907#endif
908
909PyObject *
910FORMAT_LONG(PyObject* value, PyObject* args)
911{
912 return format_int_or_long(value, args, long_format);
913}
914#endif /* FORMAT_LONG */
915
916#ifdef FORMAT_INT
917/* this is only used for 2.x, not 3.0 */
918static PyObject*
919int_format(PyObject* value, int base)
920{
921 /* Convert to base, and use the new octal format. We already
922 know this is an int object */
923 assert(PyInt_Check(value));
924 return _PyInt_Format((PyIntObject*)value, base, 1);
925}
926
927PyObject *
928FORMAT_INT(PyObject* value, PyObject* args)
929{
930 return format_int_or_long(value, args, int_format);
931}
932#endif /* FORMAT_INT */
933
934#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +0000935PyObject *
936FORMAT_FLOAT(PyObject *value, PyObject *args)
937{
938 PyObject *format_spec;
939 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000940 InternalFormatSpec format;
941
Eric Smith37f10382007-09-01 10:56:01 +0000942 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000943 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000944
945 /* check for the special case of zero length format spec, make
946 it equivalent to str(value) */
947 if (STRINGLIB_LEN(format_spec) == 0) {
948 result = STRINGLIB_TOSTR(value);
949 goto done;
950 }
951
952 /* parse the format_spec */
953 if (!parse_internal_render_format_spec(format_spec, &format, 'g'))
954 goto done;
955
956 /* type conversion? */
957 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000958 case 'e':
959 case 'E':
960 case 'f':
961 case 'F':
962 case 'g':
963 case 'G':
964 case 'n':
965 case '%':
966 /* no conversion, already a float. do the formatting */
967 result = format_float_internal(value, &format);
968 break;
969
970 default:
971 /* unknown */
972 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
973 format.type);
974 goto done;
975 }
976
977done:
Eric Smith8c663262007-08-25 02:26:07 +0000978 return result;
979}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000980#endif /* FORMAT_FLOAT */