blob: e8e83f4b798a074b2e627cce2e846516e758b76f [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
Eric Smithb7f5ba12007-08-29 12:38:45 +000016#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smith8c663262007-08-25 02:26:07 +000018/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +000078 case ' ': case '+': case '-':
Eric Smith44300952007-08-29 12:43:12 +000079#if ALLOW_PARENS_FOR_SIGN
Eric Smithb7f5ba12007-08-29 12:38:45 +000080 case '(':
Eric Smith44300952007-08-29 12:43:12 +000081#endif
Eric Smith8c663262007-08-25 02:26:07 +000082 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
92 STRINGLIB_CHAR sign;
93 Py_ssize_t width;
94 Py_ssize_t precision;
95 STRINGLIB_CHAR type;
96} InternalFormatSpec;
97
98/*
99 ptr points to the start of the format_spec, end points just past its end.
100 fills in format with the parsed information.
101 returns 1 on success, 0 on failure.
102 if failure, sets the exception
103*/
104static int
105parse_internal_render_format_spec(PyObject *format_spec,
106 InternalFormatSpec *format,
107 char default_type)
108{
109 STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec);
110 STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec);
111
112 /* end-ptr is used throughout this code to specify the length of
113 the input string */
114
115 Py_ssize_t specified_width;
116
117 format->fill_char = '\0';
118 format->align = '\0';
119 format->sign = '\0';
120 format->width = -1;
121 format->precision = -1;
122 format->type = default_type;
123
124 /* If the second char is an alignment token,
125 then parse the fill char */
126 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
127 format->align = ptr[1];
128 format->fill_char = ptr[0];
129 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000130 }
131 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000132 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000133 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000134 }
135
136 /* Parse the various sign options */
137 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
138 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000139 ++ptr;
Eric Smithb7f5ba12007-08-29 12:38:45 +0000140#if ALLOW_PARENS_FOR_SIGN
Eric Smith8c663262007-08-25 02:26:07 +0000141 if (end-ptr >= 1 && ptr[0] == ')') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000142 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000143 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000144#endif
Eric Smith8c663262007-08-25 02:26:07 +0000145 }
146
147 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000148 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000149 format->fill_char = '0';
150 if (format->align == '\0') {
151 format->align = '=';
152 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000153 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000154 }
155
156 /* XXX add error checking */
157 specified_width = get_integer(&ptr, end, &format->width);
158
159 /* if specified_width is 0, we didn't consume any characters for
160 the width. in that case, reset the width to -1, because
161 get_integer() will have set it to zero */
162 if (specified_width == 0) {
163 format->width = -1;
164 }
165
166 /* Parse field precision */
167 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000168 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000169
170 /* XXX add error checking */
171 specified_width = get_integer(&ptr, end, &format->precision);
172
173 /* not having a precision after a dot is an error */
174 if (specified_width == 0) {
175 PyErr_Format(PyExc_ValueError,
176 "Format specifier missing precision");
177 return 0;
178 }
179
180 }
181
182 /* Finally, parse the type field */
183
184 if (end-ptr > 1) {
185 /* invalid conversion spec */
186 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
187 return 0;
188 }
189
190 if (end-ptr == 1) {
191 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000192 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000193 }
194
195 return 1;
196}
197
Eric Smith8fd3eba2008-02-17 19:48:00 +0000198#if defined FORMAT_FLOAT || defined FORMAT_LONG
Eric Smith8c663262007-08-25 02:26:07 +0000199/************************************************************************/
200/*********** common routines for numeric formatting *********************/
201/************************************************************************/
202
203/* describes the layout for an integer, see the comment in
204 _calc_integer_widths() for details */
205typedef struct {
206 Py_ssize_t n_lpadding;
207 Py_ssize_t n_spadding;
208 Py_ssize_t n_rpadding;
209 char lsign;
210 Py_ssize_t n_lsign;
211 char rsign;
212 Py_ssize_t n_rsign;
213 Py_ssize_t n_total; /* just a convenience, it's derivable from the
214 other fields */
215} NumberFieldWidths;
216
217/* not all fields of format are used. for example, precision is
218 unused. should this take discrete params in order to be more clear
219 about what it does? or is passing a single format parameter easier
220 and more efficient enough to justify a little obfuscation? */
221static void
222calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
223 Py_ssize_t n_digits, const InternalFormatSpec *format)
224{
225 r->n_lpadding = 0;
226 r->n_spadding = 0;
227 r->n_rpadding = 0;
228 r->lsign = '\0';
229 r->n_lsign = 0;
230 r->rsign = '\0';
231 r->n_rsign = 0;
232
233 /* the output will look like:
234 | |
235 | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
236 | |
237
238 lsign and rsign are computed from format->sign and the actual
239 sign of the number
240
241 digits is already known
242
243 the total width is either given, or computed from the
244 actual digits
245
246 only one of lpadding, spadding, and rpadding can be non-zero,
247 and it's calculated from the width and other fields
248 */
249
250 /* compute the various parts we're going to write */
251 if (format->sign == '+') {
252 /* always put a + or - */
253 r->n_lsign = 1;
254 r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith0cb431c2007-08-28 01:07:27 +0000255 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000256#if ALLOW_PARENS_FOR_SIGN
Eric Smith0cb431c2007-08-28 01:07:27 +0000257 else if (format->sign == '(') {
Eric Smith8c663262007-08-25 02:26:07 +0000258 if (actual_sign == '-') {
259 r->n_lsign = 1;
260 r->lsign = '(';
261 r->n_rsign = 1;
262 r->rsign = ')';
263 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000264 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000265#endif
Eric Smith0cb431c2007-08-28 01:07:27 +0000266 else if (format->sign == ' ') {
Eric Smith8c663262007-08-25 02:26:07 +0000267 r->n_lsign = 1;
268 r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith0cb431c2007-08-28 01:07:27 +0000269 }
270 else {
Eric Smith8c663262007-08-25 02:26:07 +0000271 /* non specified, or the default (-) */
272 if (actual_sign == '-') {
273 r->n_lsign = 1;
274 r->lsign = '-';
275 }
276 }
277
278 /* now the number of padding characters */
279 if (format->width == -1) {
280 /* no padding at all, nothing to do */
Eric Smith0cb431c2007-08-28 01:07:27 +0000281 }
282 else {
Eric Smith8c663262007-08-25 02:26:07 +0000283 /* see if any padding is needed */
284 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
285 /* no padding needed, we're already bigger than the
286 requested width */
Eric Smith0cb431c2007-08-28 01:07:27 +0000287 }
288 else {
Eric Smith8c663262007-08-25 02:26:07 +0000289 /* determine which of left, space, or right padding is
290 needed */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000291 Py_ssize_t padding = format->width -
292 (r->n_lsign + n_digits + r->n_rsign);
Eric Smith8c663262007-08-25 02:26:07 +0000293 if (format->align == '<')
294 r->n_rpadding = padding;
295 else if (format->align == '>')
296 r->n_lpadding = padding;
297 else if (format->align == '^') {
298 r->n_lpadding = padding / 2;
299 r->n_rpadding = padding - r->n_lpadding;
Eric Smith0cb431c2007-08-28 01:07:27 +0000300 }
Eric Smith185e30c2007-08-30 22:23:08 +0000301 else if (format->align == '=')
Eric Smith8c663262007-08-25 02:26:07 +0000302 r->n_spadding = padding;
Eric Smith185e30c2007-08-30 22:23:08 +0000303 else
304 r->n_lpadding = padding;
Eric Smith8c663262007-08-25 02:26:07 +0000305 }
306 }
307 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
308 n_digits + r->n_rsign + r->n_rpadding;
309}
310
311/* fill in the non-digit parts of a numbers's string representation,
312 as determined in _calc_integer_widths(). returns the pointer to
313 where the digits go. */
314static STRINGLIB_CHAR *
315fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
316 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
317{
318 STRINGLIB_CHAR* p_digits;
319
320 if (spec->n_lpadding) {
321 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
322 p_buf += spec->n_lpadding;
323 }
324 if (spec->n_lsign == 1) {
325 *p_buf++ = spec->lsign;
326 }
327 if (spec->n_spadding) {
328 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
329 p_buf += spec->n_spadding;
330 }
331 p_digits = p_buf;
332 p_buf += n_digits;
333 if (spec->n_rsign == 1) {
334 *p_buf++ = spec->rsign;
335 }
336 if (spec->n_rpadding) {
337 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
338 p_buf += spec->n_rpadding;
339 }
340 return p_digits;
341}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000342#endif /* FORMAT_FLOAT || FORMAT_LONG */
Eric Smith8c663262007-08-25 02:26:07 +0000343
344/************************************************************************/
345/*********** string formatting ******************************************/
346/************************************************************************/
347
348static PyObject *
349format_string_internal(PyObject *value, const InternalFormatSpec *format)
350{
351 Py_ssize_t width; /* total field width */
352 Py_ssize_t lpad;
353 STRINGLIB_CHAR *dst;
354 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
355 Py_ssize_t len = STRINGLIB_LEN(value);
356 PyObject *result = NULL;
357
358 /* sign is not allowed on strings */
359 if (format->sign != '\0') {
360 PyErr_SetString(PyExc_ValueError,
361 "Sign not allowed in string format specifier");
362 goto done;
363 }
364
365 /* '=' alignment not allowed on strings */
366 if (format->align == '=') {
367 PyErr_SetString(PyExc_ValueError,
368 "'=' alignment not allowed "
369 "in string format specifier");
370 goto done;
371 }
372
373 /* if precision is specified, output no more that format.precision
374 characters */
375 if (format->precision >= 0 && len >= format->precision) {
376 len = format->precision;
377 }
378
379 if (format->width >= 0) {
380 width = format->width;
381
382 /* but use at least len characters */
383 if (len > width) {
384 width = len;
385 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000386 }
387 else {
Eric Smith8c663262007-08-25 02:26:07 +0000388 /* not specified, use all of the chars and no more */
389 width = len;
390 }
391
392 /* allocate the resulting string */
393 result = STRINGLIB_NEW(NULL, width);
394 if (result == NULL)
395 goto done;
396
397 /* now write into that space */
398 dst = STRINGLIB_STR(result);
399
400 /* figure out how much leading space we need, based on the
401 aligning */
402 if (format->align == '>')
403 lpad = width - len;
404 else if (format->align == '^')
405 lpad = (width - len) / 2;
406 else
407 lpad = 0;
408
409 /* if right aligning, increment the destination allow space on the
410 left */
411 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
412
413 /* do any padding */
414 if (width > len) {
415 STRINGLIB_CHAR fill_char = format->fill_char;
416 if (fill_char == '\0') {
417 /* use the default, if not specified */
418 fill_char = ' ';
419 }
420
421 /* pad on left */
422 if (lpad)
423 STRINGLIB_FILL(dst, fill_char, lpad);
424
425 /* pad on right */
426 if (width - len - lpad)
427 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
428 }
429
430done:
431 return result;
432}
433
434
435/************************************************************************/
436/*********** long formatting ********************************************/
437/************************************************************************/
438
Eric Smith8fd3eba2008-02-17 19:48:00 +0000439#if defined FORMAT_LONG || defined FORMAT_INT
440typedef PyObject*
441(*IntOrLongToString)(PyObject *value, int base);
442
Eric Smith8c663262007-08-25 02:26:07 +0000443static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000444format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
445 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000446{
447 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000448 PyObject *tmp = NULL;
449 STRINGLIB_CHAR *pnumeric_chars;
450 STRINGLIB_CHAR numeric_char;
Eric Smith8c663262007-08-25 02:26:07 +0000451 STRINGLIB_CHAR sign = '\0';
452 STRINGLIB_CHAR *p;
453 Py_ssize_t n_digits; /* count of digits need from the computed
454 string */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000455 Py_ssize_t n_leading_chars;
Eric Smith8c663262007-08-25 02:26:07 +0000456 NumberFieldWidths spec;
457 long x;
458
459 /* no precision allowed on integers */
460 if (format->precision != -1) {
461 PyErr_SetString(PyExc_ValueError,
462 "Precision not allowed in integer format specifier");
463 goto done;
464 }
465
466
467 /* special case for character formatting */
468 if (format->type == 'c') {
469 /* error to specify a sign */
470 if (format->sign != '\0') {
471 PyErr_SetString(PyExc_ValueError,
472 "Sign not allowed with integer"
473 " format specifier 'c'");
474 goto done;
475 }
476
477 /* taken from unicodeobject.c formatchar() */
478 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000479/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000480 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000481 if (x == -1 && PyErr_Occurred())
482 goto done;
483#ifdef Py_UNICODE_WIDE
484 if (x < 0 || x > 0x10ffff) {
485 PyErr_SetString(PyExc_OverflowError,
486 "%c arg not in range(0x110000) "
487 "(wide Python build)");
488 goto done;
489 }
490#else
491 if (x < 0 || x > 0xffff) {
492 PyErr_SetString(PyExc_OverflowError,
493 "%c arg not in range(0x10000) "
494 "(narrow Python build)");
495 goto done;
496 }
497#endif
Eric Smith8fd3eba2008-02-17 19:48:00 +0000498 numeric_char = (STRINGLIB_CHAR)x;
499 pnumeric_chars = &numeric_char;
500 n_digits = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000501 }
502 else {
Eric Smith8c663262007-08-25 02:26:07 +0000503 int base;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000504 int leading_chars_to_skip; /* Number of characters added by
505 PyNumber_ToBase that we want to
506 skip over. */
507
508 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000509 PyNumber_ToBase */
510 switch (format->type) {
511 case 'b':
512 base = 2;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000513 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000514 break;
515 case 'o':
516 base = 8;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000517 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000518 break;
519 case 'x':
520 case 'X':
521 base = 16;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000522 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000523 break;
524 default: /* shouldn't be needed, but stops a compiler warning */
525 case 'd':
526 base = 10;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000527 leading_chars_to_skip = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000528 break;
529 }
530
Eric Smith8fd3eba2008-02-17 19:48:00 +0000531 /* Do the hard part, converting to a string in a given base */
532 tmp = tostring(value, base);
533 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000534 goto done;
535
Eric Smith8fd3eba2008-02-17 19:48:00 +0000536 pnumeric_chars = STRINGLIB_STR(tmp);
537 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000538
Eric Smith8fd3eba2008-02-17 19:48:00 +0000539 /* Remember not to modify what pnumeric_chars points to. it
540 might be interned. Only modify it after we copy it into a
541 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000542
Eric Smith8fd3eba2008-02-17 19:48:00 +0000543 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000544 and skip it */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000545 sign = pnumeric_chars[0];
Eric Smith8c663262007-08-25 02:26:07 +0000546 if (sign == '-') {
Eric Smith8fd3eba2008-02-17 19:48:00 +0000547 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000548 }
549
Eric Smith8fd3eba2008-02-17 19:48:00 +0000550 /* Skip over the leading chars (0x, 0b, etc.) */
551 n_digits -= leading_chars_to_skip;
552 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000553 }
554
Eric Smith8fd3eba2008-02-17 19:48:00 +0000555 /* Calculate the widths of the various leading and trailing parts */
Eric Smith8c663262007-08-25 02:26:07 +0000556 calc_number_widths(&spec, sign, n_digits, format);
557
Eric Smith8fd3eba2008-02-17 19:48:00 +0000558 /* Allocate a new string to hold the result */
559 result = STRINGLIB_NEW(NULL, spec.n_total);
560 if (!result)
561 goto done;
562 p = STRINGLIB_STR(result);
Eric Smith8c663262007-08-25 02:26:07 +0000563
Eric Smith8fd3eba2008-02-17 19:48:00 +0000564 /* Fill in the digit parts */
565 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
566 memmove(p + n_leading_chars,
567 pnumeric_chars,
568 n_digits * sizeof(STRINGLIB_CHAR));
569
570 /* if X, convert to uppercase */
571 if (format->type == 'X') {
572 Py_ssize_t t;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000573 for (t = 0; t < n_digits; ++t)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000574 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith8c663262007-08-25 02:26:07 +0000575 }
576
Eric Smith8fd3eba2008-02-17 19:48:00 +0000577 /* Fill in the non-digit parts */
Eric Smith8c663262007-08-25 02:26:07 +0000578 fill_number(p, &spec, n_digits,
579 format->fill_char == '\0' ? ' ' : format->fill_char);
580
Eric Smith8c663262007-08-25 02:26:07 +0000581done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000582 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000583 return result;
584}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000585#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000586
587/************************************************************************/
588/*********** float formatting *******************************************/
589/************************************************************************/
590
Eric Smith8fd3eba2008-02-17 19:48:00 +0000591#ifdef FORMAT_FLOAT
592#if STRINGLIB_IS_UNICODE
Eric Smith8c663262007-08-25 02:26:07 +0000593/* taken from unicodeobject.c */
594static Py_ssize_t
595strtounicode(Py_UNICODE *buffer, const char *charbuffer)
596{
597 register Py_ssize_t i;
598 Py_ssize_t len = strlen(charbuffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000599 for (i = len - 1; i >= 0; --i)
Eric Smith185e30c2007-08-30 22:23:08 +0000600 buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000601
602 return len;
603}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000604#endif
Eric Smith8c663262007-08-25 02:26:07 +0000605
Eric Smith8c663262007-08-25 02:26:07 +0000606/* see FORMATBUFLEN in unicodeobject.c */
607#define FLOAT_FORMATBUFLEN 120
608
609/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000610static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000611format_float_internal(PyObject *value,
612 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000613{
614 /* fmt = '%.' + `prec` + `type` + '%%'
615 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
616 char fmt[20];
617
618 /* taken from unicodeobject.c */
619 /* Worst case length calc to ensure no buffer overrun:
620
621 'g' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000622 fmt = %#.<prec>g
623 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
624 for any double rep.)
625 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000626
627 'f' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000628 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
629 len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000630
631 If prec=0 the effective precision is 1 (the leading digit is
632 always given), therefore increase the length by one.
633
634 */
635 char charbuf[FLOAT_FORMATBUFLEN];
636 Py_ssize_t n_digits;
637 double x;
638 Py_ssize_t precision = format->precision;
639 PyObject *result = NULL;
640 STRINGLIB_CHAR sign;
641 char* trailing = "";
642 STRINGLIB_CHAR *p;
643 NumberFieldWidths spec;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000644 STRINGLIB_CHAR type = format->type;
Eric Smith8c663262007-08-25 02:26:07 +0000645
646#if STRINGLIB_IS_UNICODE
647 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
648#endif
649
650 /* first, do the conversion as 8-bit chars, using the platform's
651 snprintf. then, if needed, convert to unicode. */
652
653 /* 'F' is the same as 'f', per the PEP */
654 if (type == 'F')
655 type = 'f';
656
657 x = PyFloat_AsDouble(value);
658
659 if (x == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000660 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000661
662 if (type == '%') {
663 type = 'f';
664 x *= 100;
665 trailing = "%";
666 }
667
668 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000669 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000670 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith185e30c2007-08-30 22:23:08 +0000671 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000672
673 /* cast "type", because if we're in unicode we need to pass a
674 8-bit char. this is safe, because we've restricted what "type"
675 can be */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000676 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
677 (char)type);
Eric Smith8c663262007-08-25 02:26:07 +0000678
Christian Heimesc3f30c42008-02-22 16:37:40 +0000679 /* do the actual formatting */
680 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith8c663262007-08-25 02:26:07 +0000681
682 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
683 sure why. we'll just concatentate it here, no harm done. we
684 know we can't have a buffer overflow from the fmt size
685 analysis */
686 strcat(charbuf, trailing);
687
688 /* rather than duplicate the code for snprintf for both unicode
689 and 8 bit strings, we just use the 8 bit version and then
690 convert to unicode in a separate code path. that's probably
691 the lesser of 2 evils. */
692#if STRINGLIB_IS_UNICODE
693 n_digits = strtounicode(unicodebuf, charbuf);
694 p = unicodebuf;
695#else
696 /* compute the length. I believe this is done because the return
697 value from snprintf above is unreliable */
698 n_digits = strlen(charbuf);
699 p = charbuf;
700#endif
701
702 /* is a sign character present in the output? if so, remember it
703 and skip it */
704 sign = p[0];
705 if (sign == '-') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000706 ++p;
707 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000708 }
709
710 calc_number_widths(&spec, sign, n_digits, format);
711
712 /* allocate a string with enough space */
713 result = STRINGLIB_NEW(NULL, spec.n_total);
714 if (result == NULL)
715 goto done;
716
717 /* fill in the non-digit parts */
718 fill_number(STRINGLIB_STR(result), &spec, n_digits,
719 format->fill_char == '\0' ? ' ' : format->fill_char);
720
721 /* fill in the digit parts */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000722 memmove(STRINGLIB_STR(result) +
723 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith8c663262007-08-25 02:26:07 +0000724 p,
725 n_digits * sizeof(STRINGLIB_CHAR));
726
727done:
728 return result;
729}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000730#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +0000731
732/************************************************************************/
733/*********** built in formatters ****************************************/
734/************************************************************************/
Eric Smith8fd3eba2008-02-17 19:48:00 +0000735#ifdef FORMAT_STRING
Eric Smith8c663262007-08-25 02:26:07 +0000736PyObject *
737FORMAT_STRING(PyObject* value, PyObject* args)
738{
739 PyObject *format_spec;
Eric Smith8c663262007-08-25 02:26:07 +0000740 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000741#if PY_VERSION_HEX < 0x03000000
742 PyObject *tmp = NULL;
743#endif
Eric Smith8c663262007-08-25 02:26:07 +0000744 InternalFormatSpec format;
745
Eric Smith8fd3eba2008-02-17 19:48:00 +0000746 /* If 2.x, we accept either str or unicode, and try to convert it
747 to the right type. In 3.x, we insist on only unicode */
748#if PY_VERSION_HEX >= 0x03000000
749 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
750 &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000751 goto done;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000752#else
753 /* If 2.x, convert format_spec to the same type as value */
754 /* This is to allow things like u''.format('') */
755 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
756 goto done;
757 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
758 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
759 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
760 goto done;
761 }
762 tmp = STRINGLIB_TOSTR(format_spec);
763 if (tmp == NULL)
764 goto done;
765 format_spec = tmp;
766#endif
Eric Smith8c663262007-08-25 02:26:07 +0000767
768 /* check for the special case of zero length format spec, make
769 it equivalent to str(value) */
770 if (STRINGLIB_LEN(format_spec) == 0) {
771 result = STRINGLIB_TOSTR(value);
772 goto done;
773 }
774
Eric Smith8fd3eba2008-02-17 19:48:00 +0000775
Eric Smith8c663262007-08-25 02:26:07 +0000776 /* parse the format_spec */
777 if (!parse_internal_render_format_spec(format_spec, &format, 's'))
778 goto done;
779
780 /* type conversion? */
781 switch (format.type) {
782 case 's':
783 /* no type conversion needed, already a string. do the formatting */
784 result = format_string_internal(value, &format);
785 break;
Eric Smith8c663262007-08-25 02:26:07 +0000786 default:
787 /* unknown */
788 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
789 format.type);
790 goto done;
791 }
792
793done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000794#if PY_VERSION_HEX < 0x03000000
795 Py_XDECREF(tmp);
796#endif
Eric Smith8c663262007-08-25 02:26:07 +0000797 return result;
798}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000799#endif /* FORMAT_STRING */
Eric Smith8c663262007-08-25 02:26:07 +0000800
Eric Smith8fd3eba2008-02-17 19:48:00 +0000801#if defined FORMAT_LONG || defined FORMAT_INT
802static PyObject*
803format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000804{
805 PyObject *format_spec;
806 PyObject *result = NULL;
807 PyObject *tmp = NULL;
808 InternalFormatSpec format;
809
Eric Smith8fd3eba2008-02-17 19:48:00 +0000810 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
811 &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000812 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000813
814 /* check for the special case of zero length format spec, make
815 it equivalent to str(value) */
816 if (STRINGLIB_LEN(format_spec) == 0) {
817 result = STRINGLIB_TOSTR(value);
818 goto done;
819 }
820
821 /* parse the format_spec */
822 if (!parse_internal_render_format_spec(format_spec, &format, 'd'))
823 goto done;
824
825 /* type conversion? */
826 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000827 case 'b':
828 case 'c':
829 case 'd':
830 case 'o':
831 case 'x':
832 case 'X':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000833 /* no type conversion needed, already an int (or long). do
834 the formatting */
835 result = format_int_or_long_internal(value, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +0000836 break;
837
Eric Smithfa767ef2008-01-28 10:59:27 +0000838 case 'e':
839 case 'E':
840 case 'f':
841 case 'F':
842 case 'g':
843 case 'G':
844 case 'n':
845 case '%':
846 /* convert to float */
847 tmp = PyNumber_Float(value);
848 if (tmp == NULL)
849 goto done;
850 result = format_float_internal(value, &format);
851 break;
852
Eric Smith8c663262007-08-25 02:26:07 +0000853 default:
854 /* unknown */
855 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
856 format.type);
857 goto done;
858 }
859
860done:
861 Py_XDECREF(tmp);
862 return result;
863}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000864#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000865
Eric Smith8fd3eba2008-02-17 19:48:00 +0000866#ifdef FORMAT_LONG
867/* Need to define long_format as a function that will convert a long
868 to a string. In 3.0, _PyLong_Format has the correct signature. In
869 2.x, we need to fudge a few parameters */
870#if PY_VERSION_HEX >= 0x03000000
871#define long_format _PyLong_Format
872#else
873static PyObject*
874long_format(PyObject* value, int base)
875{
876 /* Convert to base, don't add trailing 'L', and use the new octal
877 format. We already know this is a long object */
878 assert(PyLong_Check(value));
879 /* convert to base, don't add 'L', and use the new octal format */
880 return _PyLong_Format(value, base, 0, 1);
881}
882#endif
883
884PyObject *
885FORMAT_LONG(PyObject* value, PyObject* args)
886{
887 return format_int_or_long(value, args, long_format);
888}
889#endif /* FORMAT_LONG */
890
891#ifdef FORMAT_INT
892/* this is only used for 2.x, not 3.0 */
893static PyObject*
894int_format(PyObject* value, int base)
895{
896 /* Convert to base, and use the new octal format. We already
897 know this is an int object */
898 assert(PyInt_Check(value));
899 return _PyInt_Format((PyIntObject*)value, base, 1);
900}
901
902PyObject *
903FORMAT_INT(PyObject* value, PyObject* args)
904{
905 return format_int_or_long(value, args, int_format);
906}
907#endif /* FORMAT_INT */
908
909#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +0000910PyObject *
911FORMAT_FLOAT(PyObject *value, PyObject *args)
912{
913 PyObject *format_spec;
914 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000915 InternalFormatSpec format;
916
Eric Smith37f10382007-09-01 10:56:01 +0000917 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000918 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000919
920 /* check for the special case of zero length format spec, make
921 it equivalent to str(value) */
922 if (STRINGLIB_LEN(format_spec) == 0) {
923 result = STRINGLIB_TOSTR(value);
924 goto done;
925 }
926
927 /* parse the format_spec */
928 if (!parse_internal_render_format_spec(format_spec, &format, 'g'))
929 goto done;
930
931 /* type conversion? */
932 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000933 case 'e':
934 case 'E':
935 case 'f':
936 case 'F':
937 case 'g':
938 case 'G':
939 case 'n':
940 case '%':
941 /* no conversion, already a float. do the formatting */
942 result = format_float_internal(value, &format);
943 break;
944
945 default:
946 /* unknown */
947 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
948 format.type);
949 goto done;
950 }
951
952done:
Eric Smith8c663262007-08-25 02:26:07 +0000953 return result;
954}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000955#endif /* FORMAT_FLOAT */