blob: 2bb2ed2ca894a3ba5073b8283e13cb06d0d82ebf [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
Eric Smithb7f5ba12007-08-29 12:38:45 +000016#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smith8c663262007-08-25 02:26:07 +000018/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +000078 case ' ': case '+': case '-':
Eric Smith44300952007-08-29 12:43:12 +000079#if ALLOW_PARENS_FOR_SIGN
Eric Smithb7f5ba12007-08-29 12:38:45 +000080 case '(':
Eric Smith44300952007-08-29 12:43:12 +000081#endif
Eric Smith8c663262007-08-25 02:26:07 +000082 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
92 STRINGLIB_CHAR sign;
93 Py_ssize_t width;
94 Py_ssize_t precision;
95 STRINGLIB_CHAR type;
96} InternalFormatSpec;
97
98/*
99 ptr points to the start of the format_spec, end points just past its end.
100 fills in format with the parsed information.
101 returns 1 on success, 0 on failure.
102 if failure, sets the exception
103*/
104static int
Eric Smith4a7d76d2008-05-30 18:10:19 +0000105parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
106 Py_ssize_t format_spec_len,
Eric Smith8c663262007-08-25 02:26:07 +0000107 InternalFormatSpec *format,
108 char default_type)
109{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000110 STRINGLIB_CHAR *ptr = format_spec;
111 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smith8c663262007-08-25 02:26:07 +0000112
113 /* end-ptr is used throughout this code to specify the length of
114 the input string */
115
116 Py_ssize_t specified_width;
117
118 format->fill_char = '\0';
119 format->align = '\0';
120 format->sign = '\0';
121 format->width = -1;
122 format->precision = -1;
123 format->type = default_type;
124
125 /* If the second char is an alignment token,
126 then parse the fill char */
127 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
128 format->align = ptr[1];
129 format->fill_char = ptr[0];
130 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000131 }
132 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000133 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000134 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000135 }
136
137 /* Parse the various sign options */
138 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
139 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000140 ++ptr;
Eric Smithb7f5ba12007-08-29 12:38:45 +0000141#if ALLOW_PARENS_FOR_SIGN
Eric Smith8c663262007-08-25 02:26:07 +0000142 if (end-ptr >= 1 && ptr[0] == ')') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000143 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000144 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000145#endif
Eric Smith8c663262007-08-25 02:26:07 +0000146 }
147
148 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000149 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000150 format->fill_char = '0';
151 if (format->align == '\0') {
152 format->align = '=';
153 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000154 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000155 }
156
157 /* XXX add error checking */
158 specified_width = get_integer(&ptr, end, &format->width);
159
160 /* if specified_width is 0, we didn't consume any characters for
161 the width. in that case, reset the width to -1, because
162 get_integer() will have set it to zero */
163 if (specified_width == 0) {
164 format->width = -1;
165 }
166
167 /* Parse field precision */
168 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000169 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000170
171 /* XXX add error checking */
172 specified_width = get_integer(&ptr, end, &format->precision);
173
174 /* not having a precision after a dot is an error */
175 if (specified_width == 0) {
176 PyErr_Format(PyExc_ValueError,
177 "Format specifier missing precision");
178 return 0;
179 }
180
181 }
182
183 /* Finally, parse the type field */
184
185 if (end-ptr > 1) {
186 /* invalid conversion spec */
187 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
188 return 0;
189 }
190
191 if (end-ptr == 1) {
192 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000193 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000194 }
195
196 return 1;
197}
198
Eric Smith8fd3eba2008-02-17 19:48:00 +0000199#if defined FORMAT_FLOAT || defined FORMAT_LONG
Eric Smith8c663262007-08-25 02:26:07 +0000200/************************************************************************/
201/*********** common routines for numeric formatting *********************/
202/************************************************************************/
203
204/* describes the layout for an integer, see the comment in
205 _calc_integer_widths() for details */
206typedef struct {
207 Py_ssize_t n_lpadding;
208 Py_ssize_t n_spadding;
209 Py_ssize_t n_rpadding;
210 char lsign;
211 Py_ssize_t n_lsign;
212 char rsign;
213 Py_ssize_t n_rsign;
214 Py_ssize_t n_total; /* just a convenience, it's derivable from the
215 other fields */
216} NumberFieldWidths;
217
218/* not all fields of format are used. for example, precision is
219 unused. should this take discrete params in order to be more clear
220 about what it does? or is passing a single format parameter easier
221 and more efficient enough to justify a little obfuscation? */
222static void
223calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
224 Py_ssize_t n_digits, const InternalFormatSpec *format)
225{
226 r->n_lpadding = 0;
227 r->n_spadding = 0;
228 r->n_rpadding = 0;
229 r->lsign = '\0';
230 r->n_lsign = 0;
231 r->rsign = '\0';
232 r->n_rsign = 0;
233
234 /* the output will look like:
235 | |
236 | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
237 | |
238
239 lsign and rsign are computed from format->sign and the actual
240 sign of the number
241
242 digits is already known
243
244 the total width is either given, or computed from the
245 actual digits
246
247 only one of lpadding, spadding, and rpadding can be non-zero,
248 and it's calculated from the width and other fields
249 */
250
251 /* compute the various parts we're going to write */
252 if (format->sign == '+') {
253 /* always put a + or - */
254 r->n_lsign = 1;
255 r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith0cb431c2007-08-28 01:07:27 +0000256 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000257#if ALLOW_PARENS_FOR_SIGN
Eric Smith0cb431c2007-08-28 01:07:27 +0000258 else if (format->sign == '(') {
Eric Smith8c663262007-08-25 02:26:07 +0000259 if (actual_sign == '-') {
260 r->n_lsign = 1;
261 r->lsign = '(';
262 r->n_rsign = 1;
263 r->rsign = ')';
264 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000265 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000266#endif
Eric Smith0cb431c2007-08-28 01:07:27 +0000267 else if (format->sign == ' ') {
Eric Smith8c663262007-08-25 02:26:07 +0000268 r->n_lsign = 1;
269 r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith0cb431c2007-08-28 01:07:27 +0000270 }
271 else {
Eric Smith8c663262007-08-25 02:26:07 +0000272 /* non specified, or the default (-) */
273 if (actual_sign == '-') {
274 r->n_lsign = 1;
275 r->lsign = '-';
276 }
277 }
278
279 /* now the number of padding characters */
280 if (format->width == -1) {
281 /* no padding at all, nothing to do */
Eric Smith0cb431c2007-08-28 01:07:27 +0000282 }
283 else {
Eric Smith8c663262007-08-25 02:26:07 +0000284 /* see if any padding is needed */
285 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
286 /* no padding needed, we're already bigger than the
287 requested width */
Eric Smith0cb431c2007-08-28 01:07:27 +0000288 }
289 else {
Eric Smith8c663262007-08-25 02:26:07 +0000290 /* determine which of left, space, or right padding is
291 needed */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000292 Py_ssize_t padding = format->width -
293 (r->n_lsign + n_digits + r->n_rsign);
Eric Smith8c663262007-08-25 02:26:07 +0000294 if (format->align == '<')
295 r->n_rpadding = padding;
296 else if (format->align == '>')
297 r->n_lpadding = padding;
298 else if (format->align == '^') {
299 r->n_lpadding = padding / 2;
300 r->n_rpadding = padding - r->n_lpadding;
Eric Smith0cb431c2007-08-28 01:07:27 +0000301 }
Eric Smith185e30c2007-08-30 22:23:08 +0000302 else if (format->align == '=')
Eric Smith8c663262007-08-25 02:26:07 +0000303 r->n_spadding = padding;
Eric Smith185e30c2007-08-30 22:23:08 +0000304 else
305 r->n_lpadding = padding;
Eric Smith8c663262007-08-25 02:26:07 +0000306 }
307 }
308 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
309 n_digits + r->n_rsign + r->n_rpadding;
310}
311
312/* fill in the non-digit parts of a numbers's string representation,
313 as determined in _calc_integer_widths(). returns the pointer to
314 where the digits go. */
315static STRINGLIB_CHAR *
316fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
317 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
318{
319 STRINGLIB_CHAR* p_digits;
320
321 if (spec->n_lpadding) {
322 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
323 p_buf += spec->n_lpadding;
324 }
325 if (spec->n_lsign == 1) {
326 *p_buf++ = spec->lsign;
327 }
328 if (spec->n_spadding) {
329 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
330 p_buf += spec->n_spadding;
331 }
332 p_digits = p_buf;
333 p_buf += n_digits;
334 if (spec->n_rsign == 1) {
335 *p_buf++ = spec->rsign;
336 }
337 if (spec->n_rpadding) {
338 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
339 p_buf += spec->n_rpadding;
340 }
341 return p_digits;
342}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000343#endif /* FORMAT_FLOAT || FORMAT_LONG */
Eric Smith8c663262007-08-25 02:26:07 +0000344
345/************************************************************************/
346/*********** string formatting ******************************************/
347/************************************************************************/
348
349static PyObject *
350format_string_internal(PyObject *value, const InternalFormatSpec *format)
351{
352 Py_ssize_t width; /* total field width */
353 Py_ssize_t lpad;
354 STRINGLIB_CHAR *dst;
355 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
356 Py_ssize_t len = STRINGLIB_LEN(value);
357 PyObject *result = NULL;
358
359 /* sign is not allowed on strings */
360 if (format->sign != '\0') {
361 PyErr_SetString(PyExc_ValueError,
362 "Sign not allowed in string format specifier");
363 goto done;
364 }
365
366 /* '=' alignment not allowed on strings */
367 if (format->align == '=') {
368 PyErr_SetString(PyExc_ValueError,
369 "'=' alignment not allowed "
370 "in string format specifier");
371 goto done;
372 }
373
374 /* if precision is specified, output no more that format.precision
375 characters */
376 if (format->precision >= 0 && len >= format->precision) {
377 len = format->precision;
378 }
379
380 if (format->width >= 0) {
381 width = format->width;
382
383 /* but use at least len characters */
384 if (len > width) {
385 width = len;
386 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000387 }
388 else {
Eric Smith8c663262007-08-25 02:26:07 +0000389 /* not specified, use all of the chars and no more */
390 width = len;
391 }
392
393 /* allocate the resulting string */
394 result = STRINGLIB_NEW(NULL, width);
395 if (result == NULL)
396 goto done;
397
398 /* now write into that space */
399 dst = STRINGLIB_STR(result);
400
401 /* figure out how much leading space we need, based on the
402 aligning */
403 if (format->align == '>')
404 lpad = width - len;
405 else if (format->align == '^')
406 lpad = (width - len) / 2;
407 else
408 lpad = 0;
409
410 /* if right aligning, increment the destination allow space on the
411 left */
412 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
413
414 /* do any padding */
415 if (width > len) {
416 STRINGLIB_CHAR fill_char = format->fill_char;
417 if (fill_char == '\0') {
418 /* use the default, if not specified */
419 fill_char = ' ';
420 }
421
422 /* pad on left */
423 if (lpad)
424 STRINGLIB_FILL(dst, fill_char, lpad);
425
426 /* pad on right */
427 if (width - len - lpad)
428 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
429 }
430
431done:
432 return result;
433}
434
435
436/************************************************************************/
437/*********** long formatting ********************************************/
438/************************************************************************/
439
Eric Smith8fd3eba2008-02-17 19:48:00 +0000440#if defined FORMAT_LONG || defined FORMAT_INT
441typedef PyObject*
442(*IntOrLongToString)(PyObject *value, int base);
443
Eric Smith8c663262007-08-25 02:26:07 +0000444static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000445format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
446 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000447{
448 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000449 PyObject *tmp = NULL;
450 STRINGLIB_CHAR *pnumeric_chars;
451 STRINGLIB_CHAR numeric_char;
Eric Smith8c663262007-08-25 02:26:07 +0000452 STRINGLIB_CHAR sign = '\0';
453 STRINGLIB_CHAR *p;
454 Py_ssize_t n_digits; /* count of digits need from the computed
455 string */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000456 Py_ssize_t n_leading_chars;
Eric Smith5807c412008-05-11 21:00:57 +0000457 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
458 allocate, used for 'n'
459 formatting. */
Eric Smith8c663262007-08-25 02:26:07 +0000460 NumberFieldWidths spec;
461 long x;
462
463 /* no precision allowed on integers */
464 if (format->precision != -1) {
465 PyErr_SetString(PyExc_ValueError,
466 "Precision not allowed in integer format specifier");
467 goto done;
468 }
469
470
471 /* special case for character formatting */
472 if (format->type == 'c') {
473 /* error to specify a sign */
474 if (format->sign != '\0') {
475 PyErr_SetString(PyExc_ValueError,
476 "Sign not allowed with integer"
477 " format specifier 'c'");
478 goto done;
479 }
480
481 /* taken from unicodeobject.c formatchar() */
482 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000483/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000484 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000485 if (x == -1 && PyErr_Occurred())
486 goto done;
487#ifdef Py_UNICODE_WIDE
488 if (x < 0 || x > 0x10ffff) {
489 PyErr_SetString(PyExc_OverflowError,
490 "%c arg not in range(0x110000) "
491 "(wide Python build)");
492 goto done;
493 }
494#else
495 if (x < 0 || x > 0xffff) {
496 PyErr_SetString(PyExc_OverflowError,
497 "%c arg not in range(0x10000) "
498 "(narrow Python build)");
499 goto done;
500 }
501#endif
Eric Smith8fd3eba2008-02-17 19:48:00 +0000502 numeric_char = (STRINGLIB_CHAR)x;
503 pnumeric_chars = &numeric_char;
504 n_digits = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000505 }
506 else {
Eric Smith8c663262007-08-25 02:26:07 +0000507 int base;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000508 int leading_chars_to_skip; /* Number of characters added by
509 PyNumber_ToBase that we want to
510 skip over. */
511
512 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000513 PyNumber_ToBase */
514 switch (format->type) {
515 case 'b':
516 base = 2;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000517 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000518 break;
519 case 'o':
520 base = 8;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000521 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000522 break;
523 case 'x':
524 case 'X':
525 base = 16;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000526 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000527 break;
528 default: /* shouldn't be needed, but stops a compiler warning */
529 case 'd':
Eric Smith5807c412008-05-11 21:00:57 +0000530 case 'n':
Eric Smith8c663262007-08-25 02:26:07 +0000531 base = 10;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000532 leading_chars_to_skip = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000533 break;
534 }
535
Eric Smith8fd3eba2008-02-17 19:48:00 +0000536 /* Do the hard part, converting to a string in a given base */
537 tmp = tostring(value, base);
538 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000539 goto done;
540
Eric Smith8fd3eba2008-02-17 19:48:00 +0000541 pnumeric_chars = STRINGLIB_STR(tmp);
542 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000543
Eric Smith8fd3eba2008-02-17 19:48:00 +0000544 /* Remember not to modify what pnumeric_chars points to. it
545 might be interned. Only modify it after we copy it into a
546 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000547
Eric Smith8fd3eba2008-02-17 19:48:00 +0000548 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000549 and skip it */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000550 sign = pnumeric_chars[0];
Eric Smith8c663262007-08-25 02:26:07 +0000551 if (sign == '-') {
Eric Smith8fd3eba2008-02-17 19:48:00 +0000552 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000553 }
554
Eric Smith8fd3eba2008-02-17 19:48:00 +0000555 /* Skip over the leading chars (0x, 0b, etc.) */
556 n_digits -= leading_chars_to_skip;
557 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000558 }
559
Eric Smith8fd3eba2008-02-17 19:48:00 +0000560 /* Calculate the widths of the various leading and trailing parts */
Eric Smith8c663262007-08-25 02:26:07 +0000561 calc_number_widths(&spec, sign, n_digits, format);
562
Eric Smith5807c412008-05-11 21:00:57 +0000563 if (format->type == 'n')
564 /* Compute how many additional chars we need to allocate
565 to hold the thousands grouping. */
566 STRINGLIB_GROUPING(pnumeric_chars, n_digits,
567 pnumeric_chars+n_digits,
568 0, &n_grouping_chars, 0);
569
Eric Smith8fd3eba2008-02-17 19:48:00 +0000570 /* Allocate a new string to hold the result */
Eric Smith5807c412008-05-11 21:00:57 +0000571 result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000572 if (!result)
573 goto done;
574 p = STRINGLIB_STR(result);
Eric Smith8c663262007-08-25 02:26:07 +0000575
Eric Smith8fd3eba2008-02-17 19:48:00 +0000576 /* Fill in the digit parts */
577 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
578 memmove(p + n_leading_chars,
579 pnumeric_chars,
580 n_digits * sizeof(STRINGLIB_CHAR));
581
Eric Smith5807c412008-05-11 21:00:57 +0000582 /* If type is 'X', convert to uppercase */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000583 if (format->type == 'X') {
584 Py_ssize_t t;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000585 for (t = 0; t < n_digits; ++t)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000586 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith8c663262007-08-25 02:26:07 +0000587 }
588
Eric Smith5807c412008-05-11 21:00:57 +0000589 /* Insert the grouping, if any, after the uppercasing of 'X', so we can
590 ensure that grouping chars won't be affeted. */
591 if (n_grouping_chars && format->type == 'n') {
592 /* We know this can't fail, since we've already
593 reserved enough space. */
594 STRINGLIB_CHAR *pstart = p + n_leading_chars;
595 int r = STRINGLIB_GROUPING(pstart, n_digits,
596 pstart + n_digits,
597 spec.n_total+n_grouping_chars-n_leading_chars,
598 NULL, 0);
599 assert(r);
600 }
601
Eric Smith8fd3eba2008-02-17 19:48:00 +0000602 /* Fill in the non-digit parts */
Eric Smith8c663262007-08-25 02:26:07 +0000603 fill_number(p, &spec, n_digits,
604 format->fill_char == '\0' ? ' ' : format->fill_char);
605
Eric Smith8c663262007-08-25 02:26:07 +0000606done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000607 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000608 return result;
609}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000610#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000611
612/************************************************************************/
613/*********** float formatting *******************************************/
614/************************************************************************/
615
Eric Smith8fd3eba2008-02-17 19:48:00 +0000616#ifdef FORMAT_FLOAT
617#if STRINGLIB_IS_UNICODE
Eric Smith8c663262007-08-25 02:26:07 +0000618/* taken from unicodeobject.c */
619static Py_ssize_t
620strtounicode(Py_UNICODE *buffer, const char *charbuffer)
621{
622 register Py_ssize_t i;
623 Py_ssize_t len = strlen(charbuffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000624 for (i = len - 1; i >= 0; --i)
Eric Smith185e30c2007-08-30 22:23:08 +0000625 buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000626
627 return len;
628}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000629#endif
Eric Smith8c663262007-08-25 02:26:07 +0000630
Eric Smith8c663262007-08-25 02:26:07 +0000631/* see FORMATBUFLEN in unicodeobject.c */
632#define FLOAT_FORMATBUFLEN 120
633
634/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000635static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000636format_float_internal(PyObject *value,
637 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000638{
639 /* fmt = '%.' + `prec` + `type` + '%%'
640 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
641 char fmt[20];
642
643 /* taken from unicodeobject.c */
644 /* Worst case length calc to ensure no buffer overrun:
645
646 'g' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000647 fmt = %#.<prec>g
648 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
649 for any double rep.)
650 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000651
652 'f' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000653 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
654 len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000655
656 If prec=0 the effective precision is 1 (the leading digit is
657 always given), therefore increase the length by one.
658
659 */
660 char charbuf[FLOAT_FORMATBUFLEN];
661 Py_ssize_t n_digits;
662 double x;
663 Py_ssize_t precision = format->precision;
664 PyObject *result = NULL;
665 STRINGLIB_CHAR sign;
666 char* trailing = "";
667 STRINGLIB_CHAR *p;
668 NumberFieldWidths spec;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000669 STRINGLIB_CHAR type = format->type;
Eric Smith8c663262007-08-25 02:26:07 +0000670
671#if STRINGLIB_IS_UNICODE
672 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
673#endif
674
675 /* first, do the conversion as 8-bit chars, using the platform's
676 snprintf. then, if needed, convert to unicode. */
677
678 /* 'F' is the same as 'f', per the PEP */
679 if (type == 'F')
680 type = 'f';
681
682 x = PyFloat_AsDouble(value);
683
684 if (x == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000685 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000686
687 if (type == '%') {
688 type = 'f';
689 x *= 100;
690 trailing = "%";
691 }
692
693 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000694 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000695 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith185e30c2007-08-30 22:23:08 +0000696 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000697
698 /* cast "type", because if we're in unicode we need to pass a
699 8-bit char. this is safe, because we've restricted what "type"
700 can be */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000701 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
702 (char)type);
Eric Smith8c663262007-08-25 02:26:07 +0000703
Christian Heimesc3f30c42008-02-22 16:37:40 +0000704 /* do the actual formatting */
705 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith8c663262007-08-25 02:26:07 +0000706
707 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
708 sure why. we'll just concatentate it here, no harm done. we
709 know we can't have a buffer overflow from the fmt size
710 analysis */
711 strcat(charbuf, trailing);
712
713 /* rather than duplicate the code for snprintf for both unicode
714 and 8 bit strings, we just use the 8 bit version and then
715 convert to unicode in a separate code path. that's probably
716 the lesser of 2 evils. */
717#if STRINGLIB_IS_UNICODE
718 n_digits = strtounicode(unicodebuf, charbuf);
719 p = unicodebuf;
720#else
721 /* compute the length. I believe this is done because the return
722 value from snprintf above is unreliable */
723 n_digits = strlen(charbuf);
724 p = charbuf;
725#endif
726
727 /* is a sign character present in the output? if so, remember it
728 and skip it */
729 sign = p[0];
730 if (sign == '-') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000731 ++p;
732 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000733 }
734
735 calc_number_widths(&spec, sign, n_digits, format);
736
737 /* allocate a string with enough space */
738 result = STRINGLIB_NEW(NULL, spec.n_total);
739 if (result == NULL)
740 goto done;
741
742 /* fill in the non-digit parts */
743 fill_number(STRINGLIB_STR(result), &spec, n_digits,
744 format->fill_char == '\0' ? ' ' : format->fill_char);
745
746 /* fill in the digit parts */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000747 memmove(STRINGLIB_STR(result) +
748 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith8c663262007-08-25 02:26:07 +0000749 p,
750 n_digits * sizeof(STRINGLIB_CHAR));
751
752done:
753 return result;
754}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000755#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +0000756
757/************************************************************************/
758/*********** built in formatters ****************************************/
759/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000760PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000761FORMAT_STRING(PyObject *obj,
762 STRINGLIB_CHAR *format_spec,
763 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +0000764{
Eric Smith8c663262007-08-25 02:26:07 +0000765 InternalFormatSpec format;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000766 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000767
768 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000769 it equivalent to str(obj) */
770 if (format_spec_len == 0) {
771 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000772 goto done;
773 }
774
775 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000776 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
777 &format, 's'))
Eric Smith8c663262007-08-25 02:26:07 +0000778 goto done;
779
780 /* type conversion? */
781 switch (format.type) {
782 case 's':
783 /* no type conversion needed, already a string. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000784 result = format_string_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +0000785 break;
Eric Smith8c663262007-08-25 02:26:07 +0000786 default:
787 /* unknown */
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000788 #if STRINGLIB_IS_UNICODE
789 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
790 hence the two cases. If it is char, gcc complains that the
791 condition below is always true, hence the ifdef. */
792 if (format.type > 32 && format.type <128)
793 #endif
794 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
795 (char)format.type);
796 #if STRINGLIB_IS_UNICODE
797 else
798 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
799 (unsigned int)format.type);
800 #endif
Eric Smith8c663262007-08-25 02:26:07 +0000801 goto done;
802 }
803
804done:
Eric Smith8c663262007-08-25 02:26:07 +0000805 return result;
806}
807
Eric Smith8fd3eba2008-02-17 19:48:00 +0000808#if defined FORMAT_LONG || defined FORMAT_INT
809static PyObject*
Eric Smith4a7d76d2008-05-30 18:10:19 +0000810format_int_or_long(PyObject* obj,
811 STRINGLIB_CHAR *format_spec,
812 Py_ssize_t format_spec_len,
813 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000814{
Eric Smith8c663262007-08-25 02:26:07 +0000815 PyObject *result = NULL;
816 PyObject *tmp = NULL;
817 InternalFormatSpec format;
818
Eric Smith8c663262007-08-25 02:26:07 +0000819 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000820 it equivalent to str(obj) */
821 if (format_spec_len == 0) {
822 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000823 goto done;
824 }
825
826 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000827 if (!parse_internal_render_format_spec(format_spec,
828 format_spec_len,
829 &format, 'd'))
Eric Smith8c663262007-08-25 02:26:07 +0000830 goto done;
831
832 /* type conversion? */
833 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000834 case 'b':
835 case 'c':
836 case 'd':
837 case 'o':
838 case 'x':
839 case 'X':
Eric Smith5807c412008-05-11 21:00:57 +0000840 case 'n':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000841 /* no type conversion needed, already an int (or long). do
842 the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000843 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +0000844 break;
845
Eric Smithfa767ef2008-01-28 10:59:27 +0000846 case 'e':
847 case 'E':
848 case 'f':
849 case 'F':
850 case 'g':
851 case 'G':
Eric Smithfa767ef2008-01-28 10:59:27 +0000852 case '%':
853 /* convert to float */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000854 tmp = PyNumber_Float(obj);
Eric Smithfa767ef2008-01-28 10:59:27 +0000855 if (tmp == NULL)
856 goto done;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000857 result = format_float_internal(obj, &format);
Eric Smithfa767ef2008-01-28 10:59:27 +0000858 break;
859
Eric Smith8c663262007-08-25 02:26:07 +0000860 default:
861 /* unknown */
862 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
863 format.type);
864 goto done;
865 }
866
867done:
868 Py_XDECREF(tmp);
869 return result;
870}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000871#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000872
Eric Smith8fd3eba2008-02-17 19:48:00 +0000873#ifdef FORMAT_LONG
874/* Need to define long_format as a function that will convert a long
875 to a string. In 3.0, _PyLong_Format has the correct signature. In
876 2.x, we need to fudge a few parameters */
877#if PY_VERSION_HEX >= 0x03000000
878#define long_format _PyLong_Format
879#else
880static PyObject*
881long_format(PyObject* value, int base)
882{
883 /* Convert to base, don't add trailing 'L', and use the new octal
884 format. We already know this is a long object */
885 assert(PyLong_Check(value));
886 /* convert to base, don't add 'L', and use the new octal format */
887 return _PyLong_Format(value, base, 0, 1);
888}
889#endif
890
891PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000892FORMAT_LONG(PyObject *obj,
893 STRINGLIB_CHAR *format_spec,
894 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000895{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000896 return format_int_or_long(obj, format_spec, format_spec_len,
897 long_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000898}
899#endif /* FORMAT_LONG */
900
901#ifdef FORMAT_INT
902/* this is only used for 2.x, not 3.0 */
903static PyObject*
904int_format(PyObject* value, int base)
905{
906 /* Convert to base, and use the new octal format. We already
907 know this is an int object */
908 assert(PyInt_Check(value));
909 return _PyInt_Format((PyIntObject*)value, base, 1);
910}
911
912PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000913FORMAT_INT(PyObject *obj,
914 STRINGLIB_CHAR *format_spec,
915 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000916{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000917 return format_int_or_long(obj, format_spec, format_spec_len,
918 int_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000919}
920#endif /* FORMAT_INT */
921
922#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +0000923PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000924FORMAT_FLOAT(PyObject *obj,
925 STRINGLIB_CHAR *format_spec,
926 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +0000927{
Eric Smith8c663262007-08-25 02:26:07 +0000928 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000929 InternalFormatSpec format;
930
Eric Smith8c663262007-08-25 02:26:07 +0000931 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000932 it equivalent to str(obj) */
933 if (format_spec_len == 0) {
934 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000935 goto done;
936 }
937
938 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000939 if (!parse_internal_render_format_spec(format_spec,
940 format_spec_len,
941 &format, '\0'))
Eric Smith8c663262007-08-25 02:26:07 +0000942 goto done;
943
944 /* type conversion? */
945 switch (format.type) {
Christian Heimesb186d002008-03-18 15:15:01 +0000946 case '\0':
947 /* 'Z' means like 'g', but with at least one decimal. See
948 PyOS_ascii_formatd */
949 format.type = 'Z';
950 /* Deliberate fall through to the next case statement */
Eric Smith8c663262007-08-25 02:26:07 +0000951 case 'e':
952 case 'E':
953 case 'f':
954 case 'F':
955 case 'g':
956 case 'G':
957 case 'n':
958 case '%':
959 /* no conversion, already a float. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000960 result = format_float_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +0000961 break;
962
963 default:
964 /* unknown */
965 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
966 format.type);
967 goto done;
968 }
969
970done:
Eric Smith8c663262007-08-25 02:26:07 +0000971 return result;
972}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000973#endif /* FORMAT_FLOAT */