blob: 018121a4505c12b1a4a7579f83680f25d6648049 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
Eric Smithb7f5ba12007-08-29 12:38:45 +000016#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smith8c663262007-08-25 02:26:07 +000018/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +000078 case ' ': case '+': case '-':
Eric Smith44300952007-08-29 12:43:12 +000079#if ALLOW_PARENS_FOR_SIGN
Eric Smithb7f5ba12007-08-29 12:38:45 +000080 case '(':
Eric Smith44300952007-08-29 12:43:12 +000081#endif
Eric Smith8c663262007-08-25 02:26:07 +000082 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
92 STRINGLIB_CHAR sign;
93 Py_ssize_t width;
94 Py_ssize_t precision;
95 STRINGLIB_CHAR type;
96} InternalFormatSpec;
97
98/*
99 ptr points to the start of the format_spec, end points just past its end.
100 fills in format with the parsed information.
101 returns 1 on success, 0 on failure.
102 if failure, sets the exception
103*/
104static int
Eric Smith4a7d76d2008-05-30 18:10:19 +0000105parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
106 Py_ssize_t format_spec_len,
Eric Smith8c663262007-08-25 02:26:07 +0000107 InternalFormatSpec *format,
108 char default_type)
109{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000110 STRINGLIB_CHAR *ptr = format_spec;
111 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smith8c663262007-08-25 02:26:07 +0000112
113 /* end-ptr is used throughout this code to specify the length of
114 the input string */
115
116 Py_ssize_t specified_width;
117
118 format->fill_char = '\0';
119 format->align = '\0';
120 format->sign = '\0';
121 format->width = -1;
122 format->precision = -1;
123 format->type = default_type;
124
125 /* If the second char is an alignment token,
126 then parse the fill char */
127 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
128 format->align = ptr[1];
129 format->fill_char = ptr[0];
130 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000131 }
132 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000133 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000134 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000135 }
136
137 /* Parse the various sign options */
138 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
139 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000140 ++ptr;
Eric Smithb7f5ba12007-08-29 12:38:45 +0000141#if ALLOW_PARENS_FOR_SIGN
Eric Smith8c663262007-08-25 02:26:07 +0000142 if (end-ptr >= 1 && ptr[0] == ')') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000143 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000144 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000145#endif
Eric Smith8c663262007-08-25 02:26:07 +0000146 }
147
148 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000149 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000150 format->fill_char = '0';
151 if (format->align == '\0') {
152 format->align = '=';
153 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000154 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000155 }
156
157 /* XXX add error checking */
158 specified_width = get_integer(&ptr, end, &format->width);
159
160 /* if specified_width is 0, we didn't consume any characters for
161 the width. in that case, reset the width to -1, because
162 get_integer() will have set it to zero */
163 if (specified_width == 0) {
164 format->width = -1;
165 }
166
167 /* Parse field precision */
168 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000169 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000170
171 /* XXX add error checking */
172 specified_width = get_integer(&ptr, end, &format->precision);
173
174 /* not having a precision after a dot is an error */
175 if (specified_width == 0) {
176 PyErr_Format(PyExc_ValueError,
177 "Format specifier missing precision");
178 return 0;
179 }
180
181 }
182
183 /* Finally, parse the type field */
184
185 if (end-ptr > 1) {
186 /* invalid conversion spec */
187 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
188 return 0;
189 }
190
191 if (end-ptr == 1) {
192 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000193 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000194 }
195
196 return 1;
197}
198
Eric Smith8fd3eba2008-02-17 19:48:00 +0000199#if defined FORMAT_FLOAT || defined FORMAT_LONG
Eric Smith8c663262007-08-25 02:26:07 +0000200/************************************************************************/
201/*********** common routines for numeric formatting *********************/
202/************************************************************************/
203
204/* describes the layout for an integer, see the comment in
205 _calc_integer_widths() for details */
206typedef struct {
207 Py_ssize_t n_lpadding;
208 Py_ssize_t n_spadding;
209 Py_ssize_t n_rpadding;
210 char lsign;
211 Py_ssize_t n_lsign;
212 char rsign;
213 Py_ssize_t n_rsign;
214 Py_ssize_t n_total; /* just a convenience, it's derivable from the
215 other fields */
216} NumberFieldWidths;
217
218/* not all fields of format are used. for example, precision is
219 unused. should this take discrete params in order to be more clear
220 about what it does? or is passing a single format parameter easier
221 and more efficient enough to justify a little obfuscation? */
222static void
223calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
224 Py_ssize_t n_digits, const InternalFormatSpec *format)
225{
226 r->n_lpadding = 0;
227 r->n_spadding = 0;
228 r->n_rpadding = 0;
229 r->lsign = '\0';
230 r->n_lsign = 0;
231 r->rsign = '\0';
232 r->n_rsign = 0;
233
234 /* the output will look like:
235 | |
236 | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
237 | |
238
239 lsign and rsign are computed from format->sign and the actual
240 sign of the number
241
242 digits is already known
243
244 the total width is either given, or computed from the
245 actual digits
246
247 only one of lpadding, spadding, and rpadding can be non-zero,
248 and it's calculated from the width and other fields
249 */
250
251 /* compute the various parts we're going to write */
252 if (format->sign == '+') {
253 /* always put a + or - */
254 r->n_lsign = 1;
255 r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith0cb431c2007-08-28 01:07:27 +0000256 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000257#if ALLOW_PARENS_FOR_SIGN
Eric Smith0cb431c2007-08-28 01:07:27 +0000258 else if (format->sign == '(') {
Eric Smith8c663262007-08-25 02:26:07 +0000259 if (actual_sign == '-') {
260 r->n_lsign = 1;
261 r->lsign = '(';
262 r->n_rsign = 1;
263 r->rsign = ')';
264 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000265 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000266#endif
Eric Smith0cb431c2007-08-28 01:07:27 +0000267 else if (format->sign == ' ') {
Eric Smith8c663262007-08-25 02:26:07 +0000268 r->n_lsign = 1;
269 r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith0cb431c2007-08-28 01:07:27 +0000270 }
271 else {
Eric Smith8c663262007-08-25 02:26:07 +0000272 /* non specified, or the default (-) */
273 if (actual_sign == '-') {
274 r->n_lsign = 1;
275 r->lsign = '-';
276 }
277 }
278
279 /* now the number of padding characters */
280 if (format->width == -1) {
281 /* no padding at all, nothing to do */
Eric Smith0cb431c2007-08-28 01:07:27 +0000282 }
283 else {
Eric Smith8c663262007-08-25 02:26:07 +0000284 /* see if any padding is needed */
285 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
286 /* no padding needed, we're already bigger than the
287 requested width */
Eric Smith0cb431c2007-08-28 01:07:27 +0000288 }
289 else {
Eric Smith8c663262007-08-25 02:26:07 +0000290 /* determine which of left, space, or right padding is
291 needed */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000292 Py_ssize_t padding = format->width -
293 (r->n_lsign + n_digits + r->n_rsign);
Eric Smith8c663262007-08-25 02:26:07 +0000294 if (format->align == '<')
295 r->n_rpadding = padding;
296 else if (format->align == '>')
297 r->n_lpadding = padding;
298 else if (format->align == '^') {
299 r->n_lpadding = padding / 2;
300 r->n_rpadding = padding - r->n_lpadding;
Eric Smith0cb431c2007-08-28 01:07:27 +0000301 }
Eric Smith185e30c2007-08-30 22:23:08 +0000302 else if (format->align == '=')
Eric Smith8c663262007-08-25 02:26:07 +0000303 r->n_spadding = padding;
Eric Smith185e30c2007-08-30 22:23:08 +0000304 else
305 r->n_lpadding = padding;
Eric Smith8c663262007-08-25 02:26:07 +0000306 }
307 }
308 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
309 n_digits + r->n_rsign + r->n_rpadding;
310}
311
312/* fill in the non-digit parts of a numbers's string representation,
313 as determined in _calc_integer_widths(). returns the pointer to
314 where the digits go. */
315static STRINGLIB_CHAR *
Eric Smithb151a452008-06-24 11:21:04 +0000316fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
317 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
Eric Smith8c663262007-08-25 02:26:07 +0000318{
319 STRINGLIB_CHAR* p_digits;
320
321 if (spec->n_lpadding) {
322 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
323 p_buf += spec->n_lpadding;
324 }
325 if (spec->n_lsign == 1) {
326 *p_buf++ = spec->lsign;
327 }
328 if (spec->n_spadding) {
329 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
330 p_buf += spec->n_spadding;
331 }
332 p_digits = p_buf;
333 p_buf += n_digits;
334 if (spec->n_rsign == 1) {
335 *p_buf++ = spec->rsign;
336 }
337 if (spec->n_rpadding) {
338 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
339 p_buf += spec->n_rpadding;
340 }
341 return p_digits;
342}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000343#endif /* FORMAT_FLOAT || FORMAT_LONG */
Eric Smith8c663262007-08-25 02:26:07 +0000344
345/************************************************************************/
346/*********** string formatting ******************************************/
347/************************************************************************/
348
349static PyObject *
350format_string_internal(PyObject *value, const InternalFormatSpec *format)
351{
352 Py_ssize_t width; /* total field width */
353 Py_ssize_t lpad;
354 STRINGLIB_CHAR *dst;
355 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
356 Py_ssize_t len = STRINGLIB_LEN(value);
357 PyObject *result = NULL;
358
359 /* sign is not allowed on strings */
360 if (format->sign != '\0') {
361 PyErr_SetString(PyExc_ValueError,
362 "Sign not allowed in string format specifier");
363 goto done;
364 }
365
366 /* '=' alignment not allowed on strings */
367 if (format->align == '=') {
368 PyErr_SetString(PyExc_ValueError,
369 "'=' alignment not allowed "
370 "in string format specifier");
371 goto done;
372 }
373
374 /* if precision is specified, output no more that format.precision
375 characters */
376 if (format->precision >= 0 && len >= format->precision) {
377 len = format->precision;
378 }
379
380 if (format->width >= 0) {
381 width = format->width;
382
383 /* but use at least len characters */
384 if (len > width) {
385 width = len;
386 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000387 }
388 else {
Eric Smith8c663262007-08-25 02:26:07 +0000389 /* not specified, use all of the chars and no more */
390 width = len;
391 }
392
393 /* allocate the resulting string */
394 result = STRINGLIB_NEW(NULL, width);
395 if (result == NULL)
396 goto done;
397
398 /* now write into that space */
399 dst = STRINGLIB_STR(result);
400
401 /* figure out how much leading space we need, based on the
402 aligning */
403 if (format->align == '>')
404 lpad = width - len;
405 else if (format->align == '^')
406 lpad = (width - len) / 2;
407 else
408 lpad = 0;
409
410 /* if right aligning, increment the destination allow space on the
411 left */
412 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
413
414 /* do any padding */
415 if (width > len) {
416 STRINGLIB_CHAR fill_char = format->fill_char;
417 if (fill_char == '\0') {
418 /* use the default, if not specified */
419 fill_char = ' ';
420 }
421
422 /* pad on left */
423 if (lpad)
424 STRINGLIB_FILL(dst, fill_char, lpad);
425
426 /* pad on right */
427 if (width - len - lpad)
428 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
429 }
430
431done:
432 return result;
433}
434
435
436/************************************************************************/
437/*********** long formatting ********************************************/
438/************************************************************************/
439
Eric Smith8fd3eba2008-02-17 19:48:00 +0000440#if defined FORMAT_LONG || defined FORMAT_INT
441typedef PyObject*
442(*IntOrLongToString)(PyObject *value, int base);
443
Eric Smith8c663262007-08-25 02:26:07 +0000444static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000445format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
446 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000447{
448 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000449 PyObject *tmp = NULL;
450 STRINGLIB_CHAR *pnumeric_chars;
451 STRINGLIB_CHAR numeric_char;
Eric Smith8c663262007-08-25 02:26:07 +0000452 STRINGLIB_CHAR sign = '\0';
453 STRINGLIB_CHAR *p;
454 Py_ssize_t n_digits; /* count of digits need from the computed
455 string */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000456 Py_ssize_t n_leading_chars;
Eric Smith5807c412008-05-11 21:00:57 +0000457 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
458 allocate, used for 'n'
459 formatting. */
Eric Smith8c663262007-08-25 02:26:07 +0000460 NumberFieldWidths spec;
461 long x;
462
463 /* no precision allowed on integers */
464 if (format->precision != -1) {
465 PyErr_SetString(PyExc_ValueError,
466 "Precision not allowed in integer format specifier");
467 goto done;
468 }
469
470
471 /* special case for character formatting */
472 if (format->type == 'c') {
473 /* error to specify a sign */
474 if (format->sign != '\0') {
475 PyErr_SetString(PyExc_ValueError,
476 "Sign not allowed with integer"
477 " format specifier 'c'");
478 goto done;
479 }
480
481 /* taken from unicodeobject.c formatchar() */
482 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000483/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000484 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000485 if (x == -1 && PyErr_Occurred())
486 goto done;
487#ifdef Py_UNICODE_WIDE
488 if (x < 0 || x > 0x10ffff) {
489 PyErr_SetString(PyExc_OverflowError,
490 "%c arg not in range(0x110000) "
491 "(wide Python build)");
492 goto done;
493 }
494#else
495 if (x < 0 || x > 0xffff) {
496 PyErr_SetString(PyExc_OverflowError,
497 "%c arg not in range(0x10000) "
498 "(narrow Python build)");
499 goto done;
500 }
501#endif
Eric Smith8fd3eba2008-02-17 19:48:00 +0000502 numeric_char = (STRINGLIB_CHAR)x;
503 pnumeric_chars = &numeric_char;
504 n_digits = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000505 }
506 else {
Eric Smith8c663262007-08-25 02:26:07 +0000507 int base;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000508 int leading_chars_to_skip; /* Number of characters added by
509 PyNumber_ToBase that we want to
510 skip over. */
511
512 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000513 PyNumber_ToBase */
514 switch (format->type) {
515 case 'b':
516 base = 2;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000517 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000518 break;
519 case 'o':
520 base = 8;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000521 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000522 break;
523 case 'x':
524 case 'X':
525 base = 16;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000526 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000527 break;
528 default: /* shouldn't be needed, but stops a compiler warning */
529 case 'd':
Eric Smith5807c412008-05-11 21:00:57 +0000530 case 'n':
Eric Smith8c663262007-08-25 02:26:07 +0000531 base = 10;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000532 leading_chars_to_skip = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000533 break;
534 }
535
Eric Smith8fd3eba2008-02-17 19:48:00 +0000536 /* Do the hard part, converting to a string in a given base */
537 tmp = tostring(value, base);
538 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000539 goto done;
540
Eric Smith8fd3eba2008-02-17 19:48:00 +0000541 pnumeric_chars = STRINGLIB_STR(tmp);
542 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000543
Eric Smith8fd3eba2008-02-17 19:48:00 +0000544 /* Remember not to modify what pnumeric_chars points to. it
545 might be interned. Only modify it after we copy it into a
546 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000547
Eric Smith8fd3eba2008-02-17 19:48:00 +0000548 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000549 and skip it */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000550 sign = pnumeric_chars[0];
Eric Smith8c663262007-08-25 02:26:07 +0000551 if (sign == '-') {
Eric Smith8fd3eba2008-02-17 19:48:00 +0000552 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000553 }
554
Eric Smith8fd3eba2008-02-17 19:48:00 +0000555 /* Skip over the leading chars (0x, 0b, etc.) */
556 n_digits -= leading_chars_to_skip;
557 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000558 }
559
Eric Smith5807c412008-05-11 21:00:57 +0000560 if (format->type == 'n')
561 /* Compute how many additional chars we need to allocate
562 to hold the thousands grouping. */
Eric Smith6d7e7a72008-06-24 01:06:47 +0000563 STRINGLIB_GROUPING(NULL, n_digits, n_digits,
Eric Smith5807c412008-05-11 21:00:57 +0000564 0, &n_grouping_chars, 0);
565
Eric Smithb151a452008-06-24 11:21:04 +0000566 /* Calculate the widths of the various leading and trailing parts */
567 calc_number_widths(&spec, sign, n_digits + n_grouping_chars, format);
568
Eric Smith8fd3eba2008-02-17 19:48:00 +0000569 /* Allocate a new string to hold the result */
Eric Smithb151a452008-06-24 11:21:04 +0000570 result = STRINGLIB_NEW(NULL, spec.n_total);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000571 if (!result)
572 goto done;
573 p = STRINGLIB_STR(result);
Eric Smith8c663262007-08-25 02:26:07 +0000574
Eric Smith8fd3eba2008-02-17 19:48:00 +0000575 /* Fill in the digit parts */
576 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
577 memmove(p + n_leading_chars,
578 pnumeric_chars,
579 n_digits * sizeof(STRINGLIB_CHAR));
580
Eric Smith5807c412008-05-11 21:00:57 +0000581 /* If type is 'X', convert to uppercase */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000582 if (format->type == 'X') {
583 Py_ssize_t t;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000584 for (t = 0; t < n_digits; ++t)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000585 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith8c663262007-08-25 02:26:07 +0000586 }
587
Eric Smith5807c412008-05-11 21:00:57 +0000588 /* Insert the grouping, if any, after the uppercasing of 'X', so we can
Eric Smith6ed16dc2008-06-24 06:07:03 +0000589 ensure that grouping chars won't be affected. */
Eric Smithb151a452008-06-24 11:21:04 +0000590 if (n_grouping_chars) {
Eric Smith5807c412008-05-11 21:00:57 +0000591 /* We know this can't fail, since we've already
592 reserved enough space. */
593 STRINGLIB_CHAR *pstart = p + n_leading_chars;
Eric Smith6d7e7a72008-06-24 01:06:47 +0000594 int r = STRINGLIB_GROUPING(pstart, n_digits, n_digits,
Eric Smith5807c412008-05-11 21:00:57 +0000595 spec.n_total+n_grouping_chars-n_leading_chars,
596 NULL, 0);
597 assert(r);
598 }
599
Eric Smithb151a452008-06-24 11:21:04 +0000600 /* Fill in the non-digit parts (padding, sign, etc.) */
601 fill_non_digits(p, &spec, n_digits + n_grouping_chars,
602 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smith8c663262007-08-25 02:26:07 +0000603
Eric Smith8c663262007-08-25 02:26:07 +0000604done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000605 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000606 return result;
607}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000608#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000609
610/************************************************************************/
611/*********** float formatting *******************************************/
612/************************************************************************/
613
Eric Smith8fd3eba2008-02-17 19:48:00 +0000614#ifdef FORMAT_FLOAT
615#if STRINGLIB_IS_UNICODE
Eric Smith8c663262007-08-25 02:26:07 +0000616/* taken from unicodeobject.c */
617static Py_ssize_t
618strtounicode(Py_UNICODE *buffer, const char *charbuffer)
619{
620 register Py_ssize_t i;
621 Py_ssize_t len = strlen(charbuffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000622 for (i = len - 1; i >= 0; --i)
Eric Smith185e30c2007-08-30 22:23:08 +0000623 buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000624
625 return len;
626}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000627#endif
Eric Smith8c663262007-08-25 02:26:07 +0000628
Eric Smith8c663262007-08-25 02:26:07 +0000629/* see FORMATBUFLEN in unicodeobject.c */
630#define FLOAT_FORMATBUFLEN 120
631
632/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000633static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000634format_float_internal(PyObject *value,
635 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000636{
637 /* fmt = '%.' + `prec` + `type` + '%%'
638 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
639 char fmt[20];
640
641 /* taken from unicodeobject.c */
642 /* Worst case length calc to ensure no buffer overrun:
643
644 'g' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000645 fmt = %#.<prec>g
646 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
647 for any double rep.)
648 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000649
650 'f' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000651 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
652 len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000653
654 If prec=0 the effective precision is 1 (the leading digit is
655 always given), therefore increase the length by one.
656
657 */
658 char charbuf[FLOAT_FORMATBUFLEN];
659 Py_ssize_t n_digits;
660 double x;
661 Py_ssize_t precision = format->precision;
662 PyObject *result = NULL;
663 STRINGLIB_CHAR sign;
664 char* trailing = "";
665 STRINGLIB_CHAR *p;
666 NumberFieldWidths spec;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000667 STRINGLIB_CHAR type = format->type;
Eric Smith8c663262007-08-25 02:26:07 +0000668
669#if STRINGLIB_IS_UNICODE
670 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
671#endif
672
673 /* first, do the conversion as 8-bit chars, using the platform's
674 snprintf. then, if needed, convert to unicode. */
675
676 /* 'F' is the same as 'f', per the PEP */
677 if (type == 'F')
678 type = 'f';
679
680 x = PyFloat_AsDouble(value);
681
682 if (x == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000683 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000684
685 if (type == '%') {
686 type = 'f';
687 x *= 100;
688 trailing = "%";
689 }
690
691 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000692 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000693 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith185e30c2007-08-30 22:23:08 +0000694 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000695
696 /* cast "type", because if we're in unicode we need to pass a
697 8-bit char. this is safe, because we've restricted what "type"
698 can be */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000699 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
700 (char)type);
Eric Smith8c663262007-08-25 02:26:07 +0000701
Christian Heimesc3f30c42008-02-22 16:37:40 +0000702 /* do the actual formatting */
703 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith8c663262007-08-25 02:26:07 +0000704
705 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
706 sure why. we'll just concatentate it here, no harm done. we
707 know we can't have a buffer overflow from the fmt size
708 analysis */
709 strcat(charbuf, trailing);
710
711 /* rather than duplicate the code for snprintf for both unicode
712 and 8 bit strings, we just use the 8 bit version and then
713 convert to unicode in a separate code path. that's probably
714 the lesser of 2 evils. */
715#if STRINGLIB_IS_UNICODE
716 n_digits = strtounicode(unicodebuf, charbuf);
717 p = unicodebuf;
718#else
719 /* compute the length. I believe this is done because the return
720 value from snprintf above is unreliable */
721 n_digits = strlen(charbuf);
722 p = charbuf;
723#endif
724
725 /* is a sign character present in the output? if so, remember it
726 and skip it */
727 sign = p[0];
728 if (sign == '-') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000729 ++p;
730 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000731 }
732
733 calc_number_widths(&spec, sign, n_digits, format);
734
735 /* allocate a string with enough space */
736 result = STRINGLIB_NEW(NULL, spec.n_total);
737 if (result == NULL)
738 goto done;
739
Eric Smithb151a452008-06-24 11:21:04 +0000740 /* Fill in the non-digit parts (padding, sign, etc.) */
741 fill_non_digits(STRINGLIB_STR(result), &spec, n_digits,
742 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smith8c663262007-08-25 02:26:07 +0000743
744 /* fill in the digit parts */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000745 memmove(STRINGLIB_STR(result) +
746 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith8c663262007-08-25 02:26:07 +0000747 p,
748 n_digits * sizeof(STRINGLIB_CHAR));
749
750done:
751 return result;
752}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000753#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +0000754
755/************************************************************************/
756/*********** built in formatters ****************************************/
757/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000758PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000759FORMAT_STRING(PyObject *obj,
760 STRINGLIB_CHAR *format_spec,
761 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +0000762{
Eric Smith8c663262007-08-25 02:26:07 +0000763 InternalFormatSpec format;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000764 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000765
766 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000767 it equivalent to str(obj) */
768 if (format_spec_len == 0) {
769 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000770 goto done;
771 }
772
773 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000774 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
775 &format, 's'))
Eric Smith8c663262007-08-25 02:26:07 +0000776 goto done;
777
778 /* type conversion? */
779 switch (format.type) {
780 case 's':
781 /* no type conversion needed, already a string. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000782 result = format_string_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +0000783 break;
Eric Smith8c663262007-08-25 02:26:07 +0000784 default:
785 /* unknown */
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000786 #if STRINGLIB_IS_UNICODE
787 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
788 hence the two cases. If it is char, gcc complains that the
789 condition below is always true, hence the ifdef. */
790 if (format.type > 32 && format.type <128)
791 #endif
792 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
793 (char)format.type);
794 #if STRINGLIB_IS_UNICODE
795 else
796 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
797 (unsigned int)format.type);
798 #endif
Eric Smith8c663262007-08-25 02:26:07 +0000799 goto done;
800 }
801
802done:
Eric Smith8c663262007-08-25 02:26:07 +0000803 return result;
804}
805
Eric Smith8fd3eba2008-02-17 19:48:00 +0000806#if defined FORMAT_LONG || defined FORMAT_INT
807static PyObject*
Eric Smith4a7d76d2008-05-30 18:10:19 +0000808format_int_or_long(PyObject* obj,
809 STRINGLIB_CHAR *format_spec,
810 Py_ssize_t format_spec_len,
811 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000812{
Eric Smith8c663262007-08-25 02:26:07 +0000813 PyObject *result = NULL;
814 PyObject *tmp = NULL;
815 InternalFormatSpec format;
816
Eric Smith8c663262007-08-25 02:26:07 +0000817 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000818 it equivalent to str(obj) */
819 if (format_spec_len == 0) {
820 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000821 goto done;
822 }
823
824 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000825 if (!parse_internal_render_format_spec(format_spec,
826 format_spec_len,
827 &format, 'd'))
Eric Smith8c663262007-08-25 02:26:07 +0000828 goto done;
829
830 /* type conversion? */
831 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000832 case 'b':
833 case 'c':
834 case 'd':
835 case 'o':
836 case 'x':
837 case 'X':
Eric Smith5807c412008-05-11 21:00:57 +0000838 case 'n':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000839 /* no type conversion needed, already an int (or long). do
840 the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000841 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +0000842 break;
843
Eric Smithfa767ef2008-01-28 10:59:27 +0000844 case 'e':
845 case 'E':
846 case 'f':
847 case 'F':
848 case 'g':
849 case 'G':
Eric Smithfa767ef2008-01-28 10:59:27 +0000850 case '%':
851 /* convert to float */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000852 tmp = PyNumber_Float(obj);
Eric Smithfa767ef2008-01-28 10:59:27 +0000853 if (tmp == NULL)
854 goto done;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000855 result = format_float_internal(obj, &format);
Eric Smithfa767ef2008-01-28 10:59:27 +0000856 break;
857
Eric Smith8c663262007-08-25 02:26:07 +0000858 default:
859 /* unknown */
860 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
861 format.type);
862 goto done;
863 }
864
865done:
866 Py_XDECREF(tmp);
867 return result;
868}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000869#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000870
Eric Smith8fd3eba2008-02-17 19:48:00 +0000871#ifdef FORMAT_LONG
872/* Need to define long_format as a function that will convert a long
873 to a string. In 3.0, _PyLong_Format has the correct signature. In
874 2.x, we need to fudge a few parameters */
875#if PY_VERSION_HEX >= 0x03000000
876#define long_format _PyLong_Format
877#else
878static PyObject*
879long_format(PyObject* value, int base)
880{
881 /* Convert to base, don't add trailing 'L', and use the new octal
882 format. We already know this is a long object */
883 assert(PyLong_Check(value));
884 /* convert to base, don't add 'L', and use the new octal format */
885 return _PyLong_Format(value, base, 0, 1);
886}
887#endif
888
889PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000890FORMAT_LONG(PyObject *obj,
891 STRINGLIB_CHAR *format_spec,
892 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000893{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000894 return format_int_or_long(obj, format_spec, format_spec_len,
895 long_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000896}
897#endif /* FORMAT_LONG */
898
899#ifdef FORMAT_INT
900/* this is only used for 2.x, not 3.0 */
901static PyObject*
902int_format(PyObject* value, int base)
903{
904 /* Convert to base, and use the new octal format. We already
905 know this is an int object */
906 assert(PyInt_Check(value));
907 return _PyInt_Format((PyIntObject*)value, base, 1);
908}
909
910PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000911FORMAT_INT(PyObject *obj,
912 STRINGLIB_CHAR *format_spec,
913 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000914{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000915 return format_int_or_long(obj, format_spec, format_spec_len,
916 int_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000917}
918#endif /* FORMAT_INT */
919
920#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +0000921PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000922FORMAT_FLOAT(PyObject *obj,
923 STRINGLIB_CHAR *format_spec,
924 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +0000925{
Eric Smith8c663262007-08-25 02:26:07 +0000926 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000927 InternalFormatSpec format;
928
Eric Smith8c663262007-08-25 02:26:07 +0000929 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000930 it equivalent to str(obj) */
931 if (format_spec_len == 0) {
932 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000933 goto done;
934 }
935
936 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000937 if (!parse_internal_render_format_spec(format_spec,
938 format_spec_len,
939 &format, '\0'))
Eric Smith8c663262007-08-25 02:26:07 +0000940 goto done;
941
942 /* type conversion? */
943 switch (format.type) {
Christian Heimesb186d002008-03-18 15:15:01 +0000944 case '\0':
945 /* 'Z' means like 'g', but with at least one decimal. See
946 PyOS_ascii_formatd */
947 format.type = 'Z';
948 /* Deliberate fall through to the next case statement */
Eric Smith8c663262007-08-25 02:26:07 +0000949 case 'e':
950 case 'E':
951 case 'f':
952 case 'F':
953 case 'g':
954 case 'G':
955 case 'n':
956 case '%':
957 /* no conversion, already a float. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000958 result = format_float_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +0000959 break;
960
961 default:
962 /* unknown */
963 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
964 format.type);
965 goto done;
966 }
967
968done:
Eric Smith8c663262007-08-25 02:26:07 +0000969 return result;
970}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000971#endif /* FORMAT_FLOAT */