blob: 9b7d607a5a82958277e6a38cb0f144b1962850fc [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
Eric Smithb7f5ba12007-08-29 12:38:45 +000016#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smith8c663262007-08-25 02:26:07 +000018/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +000078 case ' ': case '+': case '-':
Eric Smith44300952007-08-29 12:43:12 +000079#if ALLOW_PARENS_FOR_SIGN
Eric Smithb7f5ba12007-08-29 12:38:45 +000080 case '(':
Eric Smith44300952007-08-29 12:43:12 +000081#endif
Eric Smith8c663262007-08-25 02:26:07 +000082 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
Eric Smithb1ebcc62008-07-15 13:02:41 +000092 int alternate;
Eric Smith8c663262007-08-25 02:26:07 +000093 STRINGLIB_CHAR sign;
94 Py_ssize_t width;
95 Py_ssize_t precision;
96 STRINGLIB_CHAR type;
97} InternalFormatSpec;
98
99/*
100 ptr points to the start of the format_spec, end points just past its end.
101 fills in format with the parsed information.
102 returns 1 on success, 0 on failure.
103 if failure, sets the exception
104*/
105static int
Eric Smith4a7d76d2008-05-30 18:10:19 +0000106parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
107 Py_ssize_t format_spec_len,
Eric Smith8c663262007-08-25 02:26:07 +0000108 InternalFormatSpec *format,
109 char default_type)
110{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000111 STRINGLIB_CHAR *ptr = format_spec;
112 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smith8c663262007-08-25 02:26:07 +0000113
114 /* end-ptr is used throughout this code to specify the length of
115 the input string */
116
117 Py_ssize_t specified_width;
118
119 format->fill_char = '\0';
120 format->align = '\0';
Eric Smithb1ebcc62008-07-15 13:02:41 +0000121 format->alternate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000122 format->sign = '\0';
123 format->width = -1;
124 format->precision = -1;
125 format->type = default_type;
126
127 /* If the second char is an alignment token,
128 then parse the fill char */
129 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
130 format->align = ptr[1];
131 format->fill_char = ptr[0];
132 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000133 }
134 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000135 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000136 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000137 }
138
139 /* Parse the various sign options */
140 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
141 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000142 ++ptr;
Eric Smithb7f5ba12007-08-29 12:38:45 +0000143#if ALLOW_PARENS_FOR_SIGN
Eric Smith8c663262007-08-25 02:26:07 +0000144 if (end-ptr >= 1 && ptr[0] == ')') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000145 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000146 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000147#endif
Eric Smith8c663262007-08-25 02:26:07 +0000148 }
149
150 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000151 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000152 format->fill_char = '0';
153 if (format->align == '\0') {
154 format->align = '=';
155 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000156 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000157 }
158
Eric Smithb1ebcc62008-07-15 13:02:41 +0000159 /* If the next character is #, we're in alternate mode. This only
160 applies to integers. */
161 if (end-ptr >= 1 && ptr[0] == '#') {
162 format->alternate = 1;
163 ++ptr;
164 }
165
Eric Smith8c663262007-08-25 02:26:07 +0000166 /* XXX add error checking */
167 specified_width = get_integer(&ptr, end, &format->width);
168
169 /* if specified_width is 0, we didn't consume any characters for
170 the width. in that case, reset the width to -1, because
171 get_integer() will have set it to zero */
172 if (specified_width == 0) {
173 format->width = -1;
174 }
175
176 /* Parse field precision */
177 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000178 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000179
180 /* XXX add error checking */
181 specified_width = get_integer(&ptr, end, &format->precision);
182
183 /* not having a precision after a dot is an error */
184 if (specified_width == 0) {
185 PyErr_Format(PyExc_ValueError,
186 "Format specifier missing precision");
187 return 0;
188 }
189
190 }
191
192 /* Finally, parse the type field */
193
194 if (end-ptr > 1) {
195 /* invalid conversion spec */
196 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
197 return 0;
198 }
199
200 if (end-ptr == 1) {
201 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000202 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000203 }
204
205 return 1;
206}
207
Eric Smith8fd3eba2008-02-17 19:48:00 +0000208#if defined FORMAT_FLOAT || defined FORMAT_LONG
Eric Smith8c663262007-08-25 02:26:07 +0000209/************************************************************************/
210/*********** common routines for numeric formatting *********************/
211/************************************************************************/
212
213/* describes the layout for an integer, see the comment in
214 _calc_integer_widths() for details */
215typedef struct {
216 Py_ssize_t n_lpadding;
217 Py_ssize_t n_spadding;
218 Py_ssize_t n_rpadding;
219 char lsign;
220 Py_ssize_t n_lsign;
221 char rsign;
222 Py_ssize_t n_rsign;
223 Py_ssize_t n_total; /* just a convenience, it's derivable from the
224 other fields */
225} NumberFieldWidths;
226
227/* not all fields of format are used. for example, precision is
228 unused. should this take discrete params in order to be more clear
229 about what it does? or is passing a single format parameter easier
230 and more efficient enough to justify a little obfuscation? */
231static void
232calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
Eric Smithb1ebcc62008-07-15 13:02:41 +0000233 Py_ssize_t n_prefix, Py_ssize_t n_digits,
234 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000235{
236 r->n_lpadding = 0;
237 r->n_spadding = 0;
238 r->n_rpadding = 0;
239 r->lsign = '\0';
240 r->n_lsign = 0;
241 r->rsign = '\0';
242 r->n_rsign = 0;
243
244 /* the output will look like:
Eric Smithb1ebcc62008-07-15 13:02:41 +0000245 | |
246 | <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> |
247 | |
Eric Smith8c663262007-08-25 02:26:07 +0000248
249 lsign and rsign are computed from format->sign and the actual
250 sign of the number
251
Eric Smithb1ebcc62008-07-15 13:02:41 +0000252 prefix is given (it's for the '0x' prefix)
253
Eric Smith8c663262007-08-25 02:26:07 +0000254 digits is already known
255
256 the total width is either given, or computed from the
257 actual digits
258
259 only one of lpadding, spadding, and rpadding can be non-zero,
260 and it's calculated from the width and other fields
261 */
262
263 /* compute the various parts we're going to write */
264 if (format->sign == '+') {
265 /* always put a + or - */
266 r->n_lsign = 1;
267 r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith0cb431c2007-08-28 01:07:27 +0000268 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000269#if ALLOW_PARENS_FOR_SIGN
Eric Smith0cb431c2007-08-28 01:07:27 +0000270 else if (format->sign == '(') {
Eric Smith8c663262007-08-25 02:26:07 +0000271 if (actual_sign == '-') {
272 r->n_lsign = 1;
273 r->lsign = '(';
274 r->n_rsign = 1;
275 r->rsign = ')';
276 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000277 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000278#endif
Eric Smith0cb431c2007-08-28 01:07:27 +0000279 else if (format->sign == ' ') {
Eric Smith8c663262007-08-25 02:26:07 +0000280 r->n_lsign = 1;
281 r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith0cb431c2007-08-28 01:07:27 +0000282 }
283 else {
Eric Smith8c663262007-08-25 02:26:07 +0000284 /* non specified, or the default (-) */
285 if (actual_sign == '-') {
286 r->n_lsign = 1;
287 r->lsign = '-';
288 }
289 }
290
291 /* now the number of padding characters */
292 if (format->width == -1) {
293 /* no padding at all, nothing to do */
Eric Smith0cb431c2007-08-28 01:07:27 +0000294 }
295 else {
Eric Smith8c663262007-08-25 02:26:07 +0000296 /* see if any padding is needed */
297 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
298 /* no padding needed, we're already bigger than the
299 requested width */
Eric Smith0cb431c2007-08-28 01:07:27 +0000300 }
301 else {
Eric Smith8c663262007-08-25 02:26:07 +0000302 /* determine which of left, space, or right padding is
303 needed */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000304 Py_ssize_t padding = format->width -
305 (r->n_lsign + n_digits + r->n_rsign);
Eric Smith8c663262007-08-25 02:26:07 +0000306 if (format->align == '<')
307 r->n_rpadding = padding;
308 else if (format->align == '>')
309 r->n_lpadding = padding;
310 else if (format->align == '^') {
311 r->n_lpadding = padding / 2;
312 r->n_rpadding = padding - r->n_lpadding;
Eric Smith0cb431c2007-08-28 01:07:27 +0000313 }
Eric Smith185e30c2007-08-30 22:23:08 +0000314 else if (format->align == '=')
Eric Smith8c663262007-08-25 02:26:07 +0000315 r->n_spadding = padding;
Eric Smith185e30c2007-08-30 22:23:08 +0000316 else
317 r->n_lpadding = padding;
Eric Smith8c663262007-08-25 02:26:07 +0000318 }
319 }
320 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
321 n_digits + r->n_rsign + r->n_rpadding;
322}
323
324/* fill in the non-digit parts of a numbers's string representation,
325 as determined in _calc_integer_widths(). returns the pointer to
326 where the digits go. */
327static STRINGLIB_CHAR *
Eric Smithb151a452008-06-24 11:21:04 +0000328fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
329 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
Eric Smith8c663262007-08-25 02:26:07 +0000330{
331 STRINGLIB_CHAR* p_digits;
332
333 if (spec->n_lpadding) {
334 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
335 p_buf += spec->n_lpadding;
336 }
337 if (spec->n_lsign == 1) {
338 *p_buf++ = spec->lsign;
339 }
340 if (spec->n_spadding) {
341 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
342 p_buf += spec->n_spadding;
343 }
344 p_digits = p_buf;
345 p_buf += n_digits;
346 if (spec->n_rsign == 1) {
347 *p_buf++ = spec->rsign;
348 }
349 if (spec->n_rpadding) {
350 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
351 p_buf += spec->n_rpadding;
352 }
353 return p_digits;
354}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000355#endif /* FORMAT_FLOAT || FORMAT_LONG */
Eric Smith8c663262007-08-25 02:26:07 +0000356
357/************************************************************************/
358/*********** string formatting ******************************************/
359/************************************************************************/
360
361static PyObject *
362format_string_internal(PyObject *value, const InternalFormatSpec *format)
363{
364 Py_ssize_t width; /* total field width */
365 Py_ssize_t lpad;
366 STRINGLIB_CHAR *dst;
367 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
368 Py_ssize_t len = STRINGLIB_LEN(value);
369 PyObject *result = NULL;
370
371 /* sign is not allowed on strings */
372 if (format->sign != '\0') {
373 PyErr_SetString(PyExc_ValueError,
374 "Sign not allowed in string format specifier");
375 goto done;
376 }
377
Eric Smithb1ebcc62008-07-15 13:02:41 +0000378 /* alternate is not allowed on strings */
379 if (format->alternate) {
380 PyErr_SetString(PyExc_ValueError,
381 "Alternate form (#) not allowed in string format "
382 "specifier");
383 goto done;
384 }
385
Eric Smith8c663262007-08-25 02:26:07 +0000386 /* '=' alignment not allowed on strings */
387 if (format->align == '=') {
388 PyErr_SetString(PyExc_ValueError,
389 "'=' alignment not allowed "
390 "in string format specifier");
391 goto done;
392 }
393
394 /* if precision is specified, output no more that format.precision
395 characters */
396 if (format->precision >= 0 && len >= format->precision) {
397 len = format->precision;
398 }
399
400 if (format->width >= 0) {
401 width = format->width;
402
403 /* but use at least len characters */
404 if (len > width) {
405 width = len;
406 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000407 }
408 else {
Eric Smith8c663262007-08-25 02:26:07 +0000409 /* not specified, use all of the chars and no more */
410 width = len;
411 }
412
413 /* allocate the resulting string */
414 result = STRINGLIB_NEW(NULL, width);
415 if (result == NULL)
416 goto done;
417
418 /* now write into that space */
419 dst = STRINGLIB_STR(result);
420
421 /* figure out how much leading space we need, based on the
422 aligning */
423 if (format->align == '>')
424 lpad = width - len;
425 else if (format->align == '^')
426 lpad = (width - len) / 2;
427 else
428 lpad = 0;
429
430 /* if right aligning, increment the destination allow space on the
431 left */
432 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
433
434 /* do any padding */
435 if (width > len) {
436 STRINGLIB_CHAR fill_char = format->fill_char;
437 if (fill_char == '\0') {
438 /* use the default, if not specified */
439 fill_char = ' ';
440 }
441
442 /* pad on left */
443 if (lpad)
444 STRINGLIB_FILL(dst, fill_char, lpad);
445
446 /* pad on right */
447 if (width - len - lpad)
448 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
449 }
450
451done:
452 return result;
453}
454
455
456/************************************************************************/
457/*********** long formatting ********************************************/
458/************************************************************************/
459
Eric Smith8fd3eba2008-02-17 19:48:00 +0000460#if defined FORMAT_LONG || defined FORMAT_INT
461typedef PyObject*
462(*IntOrLongToString)(PyObject *value, int base);
463
Eric Smith8c663262007-08-25 02:26:07 +0000464static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000465format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
466 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000467{
468 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000469 PyObject *tmp = NULL;
470 STRINGLIB_CHAR *pnumeric_chars;
471 STRINGLIB_CHAR numeric_char;
Eric Smith8c663262007-08-25 02:26:07 +0000472 STRINGLIB_CHAR sign = '\0';
473 STRINGLIB_CHAR *p;
474 Py_ssize_t n_digits; /* count of digits need from the computed
475 string */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000476 Py_ssize_t n_leading_chars;
Eric Smith5807c412008-05-11 21:00:57 +0000477 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
478 allocate, used for 'n'
479 formatting. */
Eric Smith8c663262007-08-25 02:26:07 +0000480 NumberFieldWidths spec;
481 long x;
482
483 /* no precision allowed on integers */
484 if (format->precision != -1) {
485 PyErr_SetString(PyExc_ValueError,
486 "Precision not allowed in integer format specifier");
487 goto done;
488 }
489
490
491 /* special case for character formatting */
492 if (format->type == 'c') {
493 /* error to specify a sign */
494 if (format->sign != '\0') {
495 PyErr_SetString(PyExc_ValueError,
496 "Sign not allowed with integer"
497 " format specifier 'c'");
498 goto done;
499 }
500
501 /* taken from unicodeobject.c formatchar() */
502 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000503/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000504 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000505 if (x == -1 && PyErr_Occurred())
506 goto done;
507#ifdef Py_UNICODE_WIDE
508 if (x < 0 || x > 0x10ffff) {
509 PyErr_SetString(PyExc_OverflowError,
510 "%c arg not in range(0x110000) "
511 "(wide Python build)");
512 goto done;
513 }
514#else
515 if (x < 0 || x > 0xffff) {
516 PyErr_SetString(PyExc_OverflowError,
517 "%c arg not in range(0x10000) "
518 "(narrow Python build)");
519 goto done;
520 }
521#endif
Eric Smith8fd3eba2008-02-17 19:48:00 +0000522 numeric_char = (STRINGLIB_CHAR)x;
523 pnumeric_chars = &numeric_char;
524 n_digits = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000525 }
526 else {
Eric Smith8c663262007-08-25 02:26:07 +0000527 int base;
Eric Smithb1ebcc62008-07-15 13:02:41 +0000528 int leading_chars_to_skip = 0; /* Number of characters added by
529 PyNumber_ToBase that we want to
530 skip over. */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000531
532 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000533 PyNumber_ToBase */
534 switch (format->type) {
535 case 'b':
536 base = 2;
Eric Smithb1ebcc62008-07-15 13:02:41 +0000537 if (!format->alternate)
538 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000539 break;
540 case 'o':
541 base = 8;
Eric Smithb1ebcc62008-07-15 13:02:41 +0000542 if (!format->alternate)
543 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000544 break;
545 case 'x':
546 case 'X':
547 base = 16;
Eric Smithb1ebcc62008-07-15 13:02:41 +0000548 if (!format->alternate)
549 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000550 break;
551 default: /* shouldn't be needed, but stops a compiler warning */
552 case 'd':
Eric Smith5807c412008-05-11 21:00:57 +0000553 case 'n':
Eric Smith8c663262007-08-25 02:26:07 +0000554 base = 10;
Eric Smith8c663262007-08-25 02:26:07 +0000555 break;
556 }
557
Eric Smith8fd3eba2008-02-17 19:48:00 +0000558 /* Do the hard part, converting to a string in a given base */
559 tmp = tostring(value, base);
560 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000561 goto done;
562
Eric Smith8fd3eba2008-02-17 19:48:00 +0000563 pnumeric_chars = STRINGLIB_STR(tmp);
564 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000565
Eric Smith8fd3eba2008-02-17 19:48:00 +0000566 /* Remember not to modify what pnumeric_chars points to. it
567 might be interned. Only modify it after we copy it into a
568 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000569
Eric Smith8fd3eba2008-02-17 19:48:00 +0000570 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000571 and skip it */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000572 sign = pnumeric_chars[0];
Eric Smith8c663262007-08-25 02:26:07 +0000573 if (sign == '-') {
Eric Smith8fd3eba2008-02-17 19:48:00 +0000574 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000575 }
576
Eric Smith8fd3eba2008-02-17 19:48:00 +0000577 /* Skip over the leading chars (0x, 0b, etc.) */
578 n_digits -= leading_chars_to_skip;
579 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000580 }
581
Eric Smith5807c412008-05-11 21:00:57 +0000582 if (format->type == 'n')
583 /* Compute how many additional chars we need to allocate
584 to hold the thousands grouping. */
Eric Smith6d7e7a72008-06-24 01:06:47 +0000585 STRINGLIB_GROUPING(NULL, n_digits, n_digits,
Eric Smith5807c412008-05-11 21:00:57 +0000586 0, &n_grouping_chars, 0);
587
Eric Smithb151a452008-06-24 11:21:04 +0000588 /* Calculate the widths of the various leading and trailing parts */
Eric Smithb1ebcc62008-07-15 13:02:41 +0000589 calc_number_widths(&spec, sign, 0, n_digits + n_grouping_chars, format);
Eric Smithb151a452008-06-24 11:21:04 +0000590
Eric Smith8fd3eba2008-02-17 19:48:00 +0000591 /* Allocate a new string to hold the result */
Eric Smithb151a452008-06-24 11:21:04 +0000592 result = STRINGLIB_NEW(NULL, spec.n_total);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000593 if (!result)
594 goto done;
595 p = STRINGLIB_STR(result);
Eric Smith8c663262007-08-25 02:26:07 +0000596
Eric Smith8fd3eba2008-02-17 19:48:00 +0000597 /* Fill in the digit parts */
598 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
599 memmove(p + n_leading_chars,
600 pnumeric_chars,
601 n_digits * sizeof(STRINGLIB_CHAR));
602
Eric Smith5807c412008-05-11 21:00:57 +0000603 /* If type is 'X', convert to uppercase */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000604 if (format->type == 'X') {
605 Py_ssize_t t;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000606 for (t = 0; t < n_digits; ++t)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000607 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith8c663262007-08-25 02:26:07 +0000608 }
609
Eric Smith5807c412008-05-11 21:00:57 +0000610 /* Insert the grouping, if any, after the uppercasing of 'X', so we can
Eric Smith6ed16dc2008-06-24 06:07:03 +0000611 ensure that grouping chars won't be affected. */
Eric Smithb151a452008-06-24 11:21:04 +0000612 if (n_grouping_chars) {
Eric Smith5807c412008-05-11 21:00:57 +0000613 /* We know this can't fail, since we've already
614 reserved enough space. */
615 STRINGLIB_CHAR *pstart = p + n_leading_chars;
Eric Smith6d7e7a72008-06-24 01:06:47 +0000616 int r = STRINGLIB_GROUPING(pstart, n_digits, n_digits,
Eric Smith5807c412008-05-11 21:00:57 +0000617 spec.n_total+n_grouping_chars-n_leading_chars,
618 NULL, 0);
619 assert(r);
620 }
621
Eric Smithb151a452008-06-24 11:21:04 +0000622 /* Fill in the non-digit parts (padding, sign, etc.) */
623 fill_non_digits(p, &spec, n_digits + n_grouping_chars,
624 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smith8c663262007-08-25 02:26:07 +0000625
Eric Smith8c663262007-08-25 02:26:07 +0000626done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000627 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000628 return result;
629}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000630#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000631
632/************************************************************************/
633/*********** float formatting *******************************************/
634/************************************************************************/
635
Eric Smith8fd3eba2008-02-17 19:48:00 +0000636#ifdef FORMAT_FLOAT
637#if STRINGLIB_IS_UNICODE
Eric Smith8c663262007-08-25 02:26:07 +0000638/* taken from unicodeobject.c */
639static Py_ssize_t
640strtounicode(Py_UNICODE *buffer, const char *charbuffer)
641{
642 register Py_ssize_t i;
643 Py_ssize_t len = strlen(charbuffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000644 for (i = len - 1; i >= 0; --i)
Eric Smith185e30c2007-08-30 22:23:08 +0000645 buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000646
647 return len;
648}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000649#endif
Eric Smith8c663262007-08-25 02:26:07 +0000650
Eric Smith8c663262007-08-25 02:26:07 +0000651/* see FORMATBUFLEN in unicodeobject.c */
652#define FLOAT_FORMATBUFLEN 120
653
654/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000655static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000656format_float_internal(PyObject *value,
657 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000658{
659 /* fmt = '%.' + `prec` + `type` + '%%'
660 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
661 char fmt[20];
662
663 /* taken from unicodeobject.c */
664 /* Worst case length calc to ensure no buffer overrun:
665
666 'g' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000667 fmt = %#.<prec>g
668 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
669 for any double rep.)
670 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000671
672 'f' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000673 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
674 len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000675
676 If prec=0 the effective precision is 1 (the leading digit is
677 always given), therefore increase the length by one.
678
679 */
680 char charbuf[FLOAT_FORMATBUFLEN];
681 Py_ssize_t n_digits;
682 double x;
683 Py_ssize_t precision = format->precision;
684 PyObject *result = NULL;
685 STRINGLIB_CHAR sign;
686 char* trailing = "";
687 STRINGLIB_CHAR *p;
688 NumberFieldWidths spec;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000689 STRINGLIB_CHAR type = format->type;
Eric Smith8c663262007-08-25 02:26:07 +0000690
691#if STRINGLIB_IS_UNICODE
692 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
693#endif
694
Eric Smithb1ebcc62008-07-15 13:02:41 +0000695 /* alternate is not allowed on floats. */
696 if (format->alternate) {
697 PyErr_SetString(PyExc_ValueError,
698 "Alternate form (#) not allowed in float format "
699 "specifier");
700 goto done;
701 }
702
Eric Smith8c663262007-08-25 02:26:07 +0000703 /* first, do the conversion as 8-bit chars, using the platform's
704 snprintf. then, if needed, convert to unicode. */
705
706 /* 'F' is the same as 'f', per the PEP */
707 if (type == 'F')
708 type = 'f';
709
710 x = PyFloat_AsDouble(value);
711
712 if (x == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000713 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000714
715 if (type == '%') {
716 type = 'f';
717 x *= 100;
718 trailing = "%";
719 }
720
721 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000722 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000723 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith185e30c2007-08-30 22:23:08 +0000724 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000725
726 /* cast "type", because if we're in unicode we need to pass a
727 8-bit char. this is safe, because we've restricted what "type"
728 can be */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000729 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
730 (char)type);
Eric Smith8c663262007-08-25 02:26:07 +0000731
Christian Heimesc3f30c42008-02-22 16:37:40 +0000732 /* do the actual formatting */
733 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith8c663262007-08-25 02:26:07 +0000734
735 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
736 sure why. we'll just concatentate it here, no harm done. we
737 know we can't have a buffer overflow from the fmt size
738 analysis */
739 strcat(charbuf, trailing);
740
741 /* rather than duplicate the code for snprintf for both unicode
742 and 8 bit strings, we just use the 8 bit version and then
743 convert to unicode in a separate code path. that's probably
744 the lesser of 2 evils. */
745#if STRINGLIB_IS_UNICODE
746 n_digits = strtounicode(unicodebuf, charbuf);
747 p = unicodebuf;
748#else
749 /* compute the length. I believe this is done because the return
750 value from snprintf above is unreliable */
751 n_digits = strlen(charbuf);
752 p = charbuf;
753#endif
754
755 /* is a sign character present in the output? if so, remember it
756 and skip it */
757 sign = p[0];
758 if (sign == '-') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000759 ++p;
760 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000761 }
762
Eric Smithb1ebcc62008-07-15 13:02:41 +0000763 calc_number_widths(&spec, sign, 0, n_digits, format);
Eric Smith8c663262007-08-25 02:26:07 +0000764
765 /* allocate a string with enough space */
766 result = STRINGLIB_NEW(NULL, spec.n_total);
767 if (result == NULL)
768 goto done;
769
Eric Smithb151a452008-06-24 11:21:04 +0000770 /* Fill in the non-digit parts (padding, sign, etc.) */
771 fill_non_digits(STRINGLIB_STR(result), &spec, n_digits,
772 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smith8c663262007-08-25 02:26:07 +0000773
774 /* fill in the digit parts */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000775 memmove(STRINGLIB_STR(result) +
776 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith8c663262007-08-25 02:26:07 +0000777 p,
778 n_digits * sizeof(STRINGLIB_CHAR));
779
780done:
781 return result;
782}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000783#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +0000784
785/************************************************************************/
786/*********** built in formatters ****************************************/
787/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000788PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000789FORMAT_STRING(PyObject *obj,
790 STRINGLIB_CHAR *format_spec,
791 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +0000792{
Eric Smith8c663262007-08-25 02:26:07 +0000793 InternalFormatSpec format;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000794 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000795
796 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000797 it equivalent to str(obj) */
798 if (format_spec_len == 0) {
799 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000800 goto done;
801 }
802
803 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000804 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
805 &format, 's'))
Eric Smith8c663262007-08-25 02:26:07 +0000806 goto done;
807
808 /* type conversion? */
809 switch (format.type) {
810 case 's':
811 /* no type conversion needed, already a string. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000812 result = format_string_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +0000813 break;
Eric Smith8c663262007-08-25 02:26:07 +0000814 default:
815 /* unknown */
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000816 #if STRINGLIB_IS_UNICODE
817 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
818 hence the two cases. If it is char, gcc complains that the
819 condition below is always true, hence the ifdef. */
820 if (format.type > 32 && format.type <128)
821 #endif
822 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
823 (char)format.type);
824 #if STRINGLIB_IS_UNICODE
825 else
826 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
827 (unsigned int)format.type);
828 #endif
Eric Smith8c663262007-08-25 02:26:07 +0000829 goto done;
830 }
831
832done:
Eric Smith8c663262007-08-25 02:26:07 +0000833 return result;
834}
835
Eric Smith8fd3eba2008-02-17 19:48:00 +0000836#if defined FORMAT_LONG || defined FORMAT_INT
837static PyObject*
Eric Smith4a7d76d2008-05-30 18:10:19 +0000838format_int_or_long(PyObject* obj,
839 STRINGLIB_CHAR *format_spec,
840 Py_ssize_t format_spec_len,
841 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000842{
Eric Smith8c663262007-08-25 02:26:07 +0000843 PyObject *result = NULL;
844 PyObject *tmp = NULL;
845 InternalFormatSpec format;
846
Eric Smith8c663262007-08-25 02:26:07 +0000847 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000848 it equivalent to str(obj) */
849 if (format_spec_len == 0) {
850 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000851 goto done;
852 }
853
854 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000855 if (!parse_internal_render_format_spec(format_spec,
856 format_spec_len,
857 &format, 'd'))
Eric Smith8c663262007-08-25 02:26:07 +0000858 goto done;
859
860 /* type conversion? */
861 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000862 case 'b':
863 case 'c':
864 case 'd':
865 case 'o':
866 case 'x':
867 case 'X':
Eric Smith5807c412008-05-11 21:00:57 +0000868 case 'n':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000869 /* no type conversion needed, already an int (or long). do
870 the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000871 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +0000872 break;
873
Eric Smithfa767ef2008-01-28 10:59:27 +0000874 case 'e':
875 case 'E':
876 case 'f':
877 case 'F':
878 case 'g':
879 case 'G':
Eric Smithfa767ef2008-01-28 10:59:27 +0000880 case '%':
881 /* convert to float */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000882 tmp = PyNumber_Float(obj);
Eric Smithfa767ef2008-01-28 10:59:27 +0000883 if (tmp == NULL)
884 goto done;
Eric Smith4a7d76d2008-05-30 18:10:19 +0000885 result = format_float_internal(obj, &format);
Eric Smithfa767ef2008-01-28 10:59:27 +0000886 break;
887
Eric Smith8c663262007-08-25 02:26:07 +0000888 default:
889 /* unknown */
890 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
891 format.type);
892 goto done;
893 }
894
895done:
896 Py_XDECREF(tmp);
897 return result;
898}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000899#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000900
Eric Smith8fd3eba2008-02-17 19:48:00 +0000901#ifdef FORMAT_LONG
902/* Need to define long_format as a function that will convert a long
903 to a string. In 3.0, _PyLong_Format has the correct signature. In
904 2.x, we need to fudge a few parameters */
905#if PY_VERSION_HEX >= 0x03000000
906#define long_format _PyLong_Format
907#else
908static PyObject*
909long_format(PyObject* value, int base)
910{
911 /* Convert to base, don't add trailing 'L', and use the new octal
912 format. We already know this is a long object */
913 assert(PyLong_Check(value));
914 /* convert to base, don't add 'L', and use the new octal format */
915 return _PyLong_Format(value, base, 0, 1);
916}
917#endif
918
919PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000920FORMAT_LONG(PyObject *obj,
921 STRINGLIB_CHAR *format_spec,
922 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000923{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000924 return format_int_or_long(obj, format_spec, format_spec_len,
925 long_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000926}
927#endif /* FORMAT_LONG */
928
929#ifdef FORMAT_INT
930/* this is only used for 2.x, not 3.0 */
931static PyObject*
932int_format(PyObject* value, int base)
933{
934 /* Convert to base, and use the new octal format. We already
935 know this is an int object */
936 assert(PyInt_Check(value));
937 return _PyInt_Format((PyIntObject*)value, base, 1);
938}
939
940PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000941FORMAT_INT(PyObject *obj,
942 STRINGLIB_CHAR *format_spec,
943 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000944{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000945 return format_int_or_long(obj, format_spec, format_spec_len,
946 int_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000947}
948#endif /* FORMAT_INT */
949
950#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +0000951PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +0000952FORMAT_FLOAT(PyObject *obj,
953 STRINGLIB_CHAR *format_spec,
954 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +0000955{
Eric Smith8c663262007-08-25 02:26:07 +0000956 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000957 InternalFormatSpec format;
958
Eric Smith8c663262007-08-25 02:26:07 +0000959 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +0000960 it equivalent to str(obj) */
961 if (format_spec_len == 0) {
962 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000963 goto done;
964 }
965
966 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000967 if (!parse_internal_render_format_spec(format_spec,
968 format_spec_len,
969 &format, '\0'))
Eric Smith8c663262007-08-25 02:26:07 +0000970 goto done;
971
972 /* type conversion? */
973 switch (format.type) {
Christian Heimesb186d002008-03-18 15:15:01 +0000974 case '\0':
975 /* 'Z' means like 'g', but with at least one decimal. See
976 PyOS_ascii_formatd */
977 format.type = 'Z';
978 /* Deliberate fall through to the next case statement */
Eric Smith8c663262007-08-25 02:26:07 +0000979 case 'e':
980 case 'E':
981 case 'f':
982 case 'F':
983 case 'g':
984 case 'G':
985 case 'n':
986 case '%':
987 /* no conversion, already a float. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +0000988 result = format_float_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +0000989 break;
990
991 default:
992 /* unknown */
993 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
994 format.type);
995 goto done;
996 }
997
998done:
Eric Smith8c663262007-08-25 02:26:07 +0000999 return result;
1000}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001001#endif /* FORMAT_FLOAT */