blob: ba43200ec9eb06beee5d4baa8543c127682a96d8 [file] [log] [blame]
Eric Smitha9f7d622008-02-17 19:46:49 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
16#define ALLOW_PARENS_FOR_SIGN 0
17
18/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
78 case ' ': case '+': case '-':
79#if ALLOW_PARENS_FOR_SIGN
80 case '(':
81#endif
82 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
Eric Smithd0c84122008-07-15 10:10:07 +000092 int alternate;
Eric Smitha9f7d622008-02-17 19:46:49 +000093 STRINGLIB_CHAR sign;
94 Py_ssize_t width;
95 Py_ssize_t precision;
96 STRINGLIB_CHAR type;
97} InternalFormatSpec;
98
99/*
100 ptr points to the start of the format_spec, end points just past its end.
101 fills in format with the parsed information.
102 returns 1 on success, 0 on failure.
103 if failure, sets the exception
104*/
105static int
Eric Smithdc13b792008-05-30 18:10:04 +0000106parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
107 Py_ssize_t format_spec_len,
Eric Smitha9f7d622008-02-17 19:46:49 +0000108 InternalFormatSpec *format,
109 char default_type)
110{
Eric Smithdc13b792008-05-30 18:10:04 +0000111 STRINGLIB_CHAR *ptr = format_spec;
112 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smitha9f7d622008-02-17 19:46:49 +0000113
114 /* end-ptr is used throughout this code to specify the length of
115 the input string */
116
117 Py_ssize_t specified_width;
118
119 format->fill_char = '\0';
120 format->align = '\0';
Eric Smithd0c84122008-07-15 10:10:07 +0000121 format->alternate = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000122 format->sign = '\0';
123 format->width = -1;
124 format->precision = -1;
125 format->type = default_type;
126
127 /* If the second char is an alignment token,
128 then parse the fill char */
129 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
130 format->align = ptr[1];
131 format->fill_char = ptr[0];
132 ptr += 2;
133 }
134 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
135 format->align = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000136 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000137 }
138
139 /* Parse the various sign options */
140 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
141 format->sign = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000142 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000143#if ALLOW_PARENS_FOR_SIGN
144 if (end-ptr >= 1 && ptr[0] == ')') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000145 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000146 }
147#endif
148 }
149
Eric Smitha5fa5a22008-07-16 00:11:49 +0000150 /* If the next character is #, we're in alternate mode. This only
151 applies to integers. */
152 if (end-ptr >= 1 && ptr[0] == '#') {
153 format->alternate = 1;
154 ++ptr;
155 }
156
Eric Smitha9f7d622008-02-17 19:46:49 +0000157 /* The special case for 0-padding (backwards compat) */
158 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
159 format->fill_char = '0';
160 if (format->align == '\0') {
161 format->align = '=';
162 }
Eric Smith8a803dd2008-02-20 23:39:28 +0000163 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000164 }
165
166 /* XXX add error checking */
167 specified_width = get_integer(&ptr, end, &format->width);
168
169 /* if specified_width is 0, we didn't consume any characters for
170 the width. in that case, reset the width to -1, because
171 get_integer() will have set it to zero */
172 if (specified_width == 0) {
173 format->width = -1;
174 }
175
176 /* Parse field precision */
177 if (end-ptr && ptr[0] == '.') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000178 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000179
180 /* XXX add error checking */
181 specified_width = get_integer(&ptr, end, &format->precision);
182
183 /* not having a precision after a dot is an error */
184 if (specified_width == 0) {
185 PyErr_Format(PyExc_ValueError,
186 "Format specifier missing precision");
187 return 0;
188 }
189
190 }
191
192 /* Finally, parse the type field */
193
194 if (end-ptr > 1) {
195 /* invalid conversion spec */
196 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
197 return 0;
198 }
199
200 if (end-ptr == 1) {
201 format->type = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000202 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000203 }
204
205 return 1;
206}
207
208#if defined FORMAT_FLOAT || defined FORMAT_LONG
209/************************************************************************/
210/*********** common routines for numeric formatting *********************/
211/************************************************************************/
212
213/* describes the layout for an integer, see the comment in
Eric Smitha5fa5a22008-07-16 00:11:49 +0000214 calc_number_widths() for details */
Eric Smitha9f7d622008-02-17 19:46:49 +0000215typedef struct {
216 Py_ssize_t n_lpadding;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000217 Py_ssize_t n_prefix;
Eric Smitha9f7d622008-02-17 19:46:49 +0000218 Py_ssize_t n_spadding;
219 Py_ssize_t n_rpadding;
220 char lsign;
221 Py_ssize_t n_lsign;
222 char rsign;
223 Py_ssize_t n_rsign;
224 Py_ssize_t n_total; /* just a convenience, it's derivable from the
225 other fields */
226} NumberFieldWidths;
227
228/* not all fields of format are used. for example, precision is
229 unused. should this take discrete params in order to be more clear
230 about what it does? or is passing a single format parameter easier
231 and more efficient enough to justify a little obfuscation? */
232static void
233calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
Eric Smithd0c84122008-07-15 10:10:07 +0000234 Py_ssize_t n_prefix, Py_ssize_t n_digits,
235 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000236{
237 r->n_lpadding = 0;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000238 r->n_prefix = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000239 r->n_spadding = 0;
240 r->n_rpadding = 0;
241 r->lsign = '\0';
242 r->n_lsign = 0;
243 r->rsign = '\0';
244 r->n_rsign = 0;
245
246 /* the output will look like:
Eric Smithd0c84122008-07-15 10:10:07 +0000247 | |
248 | <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> |
249 | |
Eric Smitha9f7d622008-02-17 19:46:49 +0000250
251 lsign and rsign are computed from format->sign and the actual
252 sign of the number
253
Eric Smithd0c84122008-07-15 10:10:07 +0000254 prefix is given (it's for the '0x' prefix)
255
Eric Smitha9f7d622008-02-17 19:46:49 +0000256 digits is already known
257
258 the total width is either given, or computed from the
259 actual digits
260
261 only one of lpadding, spadding, and rpadding can be non-zero,
262 and it's calculated from the width and other fields
263 */
264
265 /* compute the various parts we're going to write */
266 if (format->sign == '+') {
267 /* always put a + or - */
268 r->n_lsign = 1;
269 r->lsign = (actual_sign == '-' ? '-' : '+');
270 }
271#if ALLOW_PARENS_FOR_SIGN
272 else if (format->sign == '(') {
273 if (actual_sign == '-') {
274 r->n_lsign = 1;
275 r->lsign = '(';
276 r->n_rsign = 1;
277 r->rsign = ')';
278 }
279 }
280#endif
281 else if (format->sign == ' ') {
282 r->n_lsign = 1;
283 r->lsign = (actual_sign == '-' ? '-' : ' ');
284 }
285 else {
286 /* non specified, or the default (-) */
287 if (actual_sign == '-') {
288 r->n_lsign = 1;
289 r->lsign = '-';
290 }
291 }
292
Eric Smitha5fa5a22008-07-16 00:11:49 +0000293 r->n_prefix = n_prefix;
294
Eric Smitha9f7d622008-02-17 19:46:49 +0000295 /* now the number of padding characters */
296 if (format->width == -1) {
297 /* no padding at all, nothing to do */
298 }
299 else {
300 /* see if any padding is needed */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000301 if (r->n_lsign + n_digits + r->n_rsign +
302 r->n_prefix >= format->width) {
Eric Smitha9f7d622008-02-17 19:46:49 +0000303 /* no padding needed, we're already bigger than the
304 requested width */
305 }
306 else {
307 /* determine which of left, space, or right padding is
308 needed */
309 Py_ssize_t padding = format->width -
Eric Smitha5fa5a22008-07-16 00:11:49 +0000310 (r->n_lsign + r->n_prefix +
311 n_digits + r->n_rsign);
Eric Smitha9f7d622008-02-17 19:46:49 +0000312 if (format->align == '<')
313 r->n_rpadding = padding;
314 else if (format->align == '>')
315 r->n_lpadding = padding;
316 else if (format->align == '^') {
317 r->n_lpadding = padding / 2;
318 r->n_rpadding = padding - r->n_lpadding;
319 }
320 else if (format->align == '=')
321 r->n_spadding = padding;
322 else
323 r->n_lpadding = padding;
324 }
325 }
Eric Smitha5fa5a22008-07-16 00:11:49 +0000326 r->n_total = r->n_lpadding + r->n_lsign + r->n_prefix +
327 r->n_spadding + n_digits + r->n_rsign + r->n_rpadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000328}
329
330/* fill in the non-digit parts of a numbers's string representation,
Eric Smitha5fa5a22008-07-16 00:11:49 +0000331 as determined in calc_number_widths(). returns the pointer to
Eric Smitha9f7d622008-02-17 19:46:49 +0000332 where the digits go. */
333static STRINGLIB_CHAR *
Eric Smith5dce7e92008-06-24 11:11:59 +0000334fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
Eric Smitha5fa5a22008-07-16 00:11:49 +0000335 STRINGLIB_CHAR *prefix, Py_ssize_t n_digits,
336 STRINGLIB_CHAR fill_char)
Eric Smitha9f7d622008-02-17 19:46:49 +0000337{
Eric Smitha5fa5a22008-07-16 00:11:49 +0000338 STRINGLIB_CHAR *p_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000339
340 if (spec->n_lpadding) {
341 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
342 p_buf += spec->n_lpadding;
343 }
344 if (spec->n_lsign == 1) {
345 *p_buf++ = spec->lsign;
346 }
Eric Smitha5fa5a22008-07-16 00:11:49 +0000347 if (spec->n_prefix) {
348 memmove(p_buf,
349 prefix,
350 spec->n_prefix * sizeof(STRINGLIB_CHAR));
351 p_buf += spec->n_prefix;
352 }
Eric Smitha9f7d622008-02-17 19:46:49 +0000353 if (spec->n_spadding) {
354 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
355 p_buf += spec->n_spadding;
356 }
357 p_digits = p_buf;
358 p_buf += n_digits;
359 if (spec->n_rsign == 1) {
360 *p_buf++ = spec->rsign;
361 }
362 if (spec->n_rpadding) {
363 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
364 p_buf += spec->n_rpadding;
365 }
366 return p_digits;
367}
368#endif /* FORMAT_FLOAT || FORMAT_LONG */
369
370/************************************************************************/
371/*********** string formatting ******************************************/
372/************************************************************************/
373
374static PyObject *
375format_string_internal(PyObject *value, const InternalFormatSpec *format)
376{
377 Py_ssize_t width; /* total field width */
378 Py_ssize_t lpad;
379 STRINGLIB_CHAR *dst;
380 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
381 Py_ssize_t len = STRINGLIB_LEN(value);
382 PyObject *result = NULL;
383
384 /* sign is not allowed on strings */
385 if (format->sign != '\0') {
386 PyErr_SetString(PyExc_ValueError,
387 "Sign not allowed in string format specifier");
388 goto done;
389 }
390
Eric Smithd0c84122008-07-15 10:10:07 +0000391 /* alternate is not allowed on strings */
392 if (format->alternate) {
393 PyErr_SetString(PyExc_ValueError,
394 "Alternate form (#) not allowed in string format "
395 "specifier");
396 goto done;
397 }
398
Eric Smitha9f7d622008-02-17 19:46:49 +0000399 /* '=' alignment not allowed on strings */
400 if (format->align == '=') {
401 PyErr_SetString(PyExc_ValueError,
402 "'=' alignment not allowed "
403 "in string format specifier");
404 goto done;
405 }
406
407 /* if precision is specified, output no more that format.precision
408 characters */
409 if (format->precision >= 0 && len >= format->precision) {
410 len = format->precision;
411 }
412
413 if (format->width >= 0) {
414 width = format->width;
415
416 /* but use at least len characters */
417 if (len > width) {
418 width = len;
419 }
420 }
421 else {
422 /* not specified, use all of the chars and no more */
423 width = len;
424 }
425
426 /* allocate the resulting string */
427 result = STRINGLIB_NEW(NULL, width);
428 if (result == NULL)
429 goto done;
430
431 /* now write into that space */
432 dst = STRINGLIB_STR(result);
433
434 /* figure out how much leading space we need, based on the
435 aligning */
436 if (format->align == '>')
437 lpad = width - len;
438 else if (format->align == '^')
439 lpad = (width - len) / 2;
440 else
441 lpad = 0;
442
443 /* if right aligning, increment the destination allow space on the
444 left */
445 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
446
447 /* do any padding */
448 if (width > len) {
449 STRINGLIB_CHAR fill_char = format->fill_char;
450 if (fill_char == '\0') {
451 /* use the default, if not specified */
452 fill_char = ' ';
453 }
454
455 /* pad on left */
456 if (lpad)
457 STRINGLIB_FILL(dst, fill_char, lpad);
458
459 /* pad on right */
460 if (width - len - lpad)
461 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
462 }
463
464done:
465 return result;
466}
467
468
469/************************************************************************/
470/*********** long formatting ********************************************/
471/************************************************************************/
472
473#if defined FORMAT_LONG || defined FORMAT_INT
474typedef PyObject*
475(*IntOrLongToString)(PyObject *value, int base);
476
477static PyObject *
478format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
479 IntOrLongToString tostring)
480{
481 PyObject *result = NULL;
482 PyObject *tmp = NULL;
483 STRINGLIB_CHAR *pnumeric_chars;
484 STRINGLIB_CHAR numeric_char;
485 STRINGLIB_CHAR sign = '\0';
486 STRINGLIB_CHAR *p;
487 Py_ssize_t n_digits; /* count of digits need from the computed
488 string */
489 Py_ssize_t n_leading_chars;
Eric Smithcf537ff2008-05-11 19:52:48 +0000490 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
491 allocate, used for 'n'
492 formatting. */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000493 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
494 STRINGLIB_CHAR *prefix = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +0000495 NumberFieldWidths spec;
496 long x;
497
498 /* no precision allowed on integers */
499 if (format->precision != -1) {
500 PyErr_SetString(PyExc_ValueError,
501 "Precision not allowed in integer format specifier");
502 goto done;
503 }
504
505
506 /* special case for character formatting */
507 if (format->type == 'c') {
508 /* error to specify a sign */
509 if (format->sign != '\0') {
510 PyErr_SetString(PyExc_ValueError,
511 "Sign not allowed with integer"
512 " format specifier 'c'");
513 goto done;
514 }
515
516 /* taken from unicodeobject.c formatchar() */
517 /* Integer input truncated to a character */
518/* XXX: won't work for int */
519 x = PyLong_AsLong(value);
520 if (x == -1 && PyErr_Occurred())
521 goto done;
522#ifdef Py_UNICODE_WIDE
523 if (x < 0 || x > 0x10ffff) {
524 PyErr_SetString(PyExc_OverflowError,
525 "%c arg not in range(0x110000) "
526 "(wide Python build)");
527 goto done;
528 }
529#else
530 if (x < 0 || x > 0xffff) {
531 PyErr_SetString(PyExc_OverflowError,
532 "%c arg not in range(0x10000) "
533 "(narrow Python build)");
534 goto done;
535 }
536#endif
537 numeric_char = (STRINGLIB_CHAR)x;
538 pnumeric_chars = &numeric_char;
539 n_digits = 1;
540 }
541 else {
542 int base;
Eric Smithd0c84122008-07-15 10:10:07 +0000543 int leading_chars_to_skip = 0; /* Number of characters added by
544 PyNumber_ToBase that we want to
545 skip over. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000546
547 /* Compute the base and how many characters will be added by
548 PyNumber_ToBase */
549 switch (format->type) {
550 case 'b':
551 base = 2;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000552 leading_chars_to_skip = 2; /* 0b */
Eric Smitha9f7d622008-02-17 19:46:49 +0000553 break;
554 case 'o':
555 base = 8;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000556 leading_chars_to_skip = 2; /* 0o */
Eric Smitha9f7d622008-02-17 19:46:49 +0000557 break;
558 case 'x':
559 case 'X':
560 base = 16;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000561 leading_chars_to_skip = 2; /* 0x */
Eric Smitha9f7d622008-02-17 19:46:49 +0000562 break;
563 default: /* shouldn't be needed, but stops a compiler warning */
564 case 'd':
Eric Smithcf537ff2008-05-11 19:52:48 +0000565 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000566 base = 10;
Eric Smitha9f7d622008-02-17 19:46:49 +0000567 break;
568 }
569
Eric Smitha5fa5a22008-07-16 00:11:49 +0000570 /* The number of prefix chars is the same as the leading
571 chars to skip */
572 if (format->alternate)
573 n_prefix = leading_chars_to_skip;
574
Eric Smitha9f7d622008-02-17 19:46:49 +0000575 /* Do the hard part, converting to a string in a given base */
576 tmp = tostring(value, base);
577 if (tmp == NULL)
578 goto done;
579
580 pnumeric_chars = STRINGLIB_STR(tmp);
581 n_digits = STRINGLIB_LEN(tmp);
582
Eric Smitha5fa5a22008-07-16 00:11:49 +0000583 prefix = pnumeric_chars;
584
Eric Smitha9f7d622008-02-17 19:46:49 +0000585 /* Remember not to modify what pnumeric_chars points to. it
586 might be interned. Only modify it after we copy it into a
587 newly allocated output buffer. */
588
589 /* Is a sign character present in the output? If so, remember it
590 and skip it */
591 sign = pnumeric_chars[0];
592 if (sign == '-') {
Eric Smitha5fa5a22008-07-16 00:11:49 +0000593 ++prefix;
Eric Smitha9f7d622008-02-17 19:46:49 +0000594 ++leading_chars_to_skip;
595 }
596
597 /* Skip over the leading chars (0x, 0b, etc.) */
598 n_digits -= leading_chars_to_skip;
599 pnumeric_chars += leading_chars_to_skip;
600 }
601
Eric Smithcf537ff2008-05-11 19:52:48 +0000602 if (format->type == 'n')
603 /* Compute how many additional chars we need to allocate
604 to hold the thousands grouping. */
Eric Smith65fe47b2008-06-24 00:42:10 +0000605 STRINGLIB_GROUPING(NULL, n_digits, n_digits,
Eric Smithcf537ff2008-05-11 19:52:48 +0000606 0, &n_grouping_chars, 0);
607
Eric Smith5dce7e92008-06-24 11:11:59 +0000608 /* Calculate the widths of the various leading and trailing parts */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000609 calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars,
610 format);
Eric Smith5dce7e92008-06-24 11:11:59 +0000611
Eric Smitha9f7d622008-02-17 19:46:49 +0000612 /* Allocate a new string to hold the result */
Eric Smith5dce7e92008-06-24 11:11:59 +0000613 result = STRINGLIB_NEW(NULL, spec.n_total);
Eric Smitha9f7d622008-02-17 19:46:49 +0000614 if (!result)
615 goto done;
616 p = STRINGLIB_STR(result);
617
Eric Smitha5fa5a22008-07-16 00:11:49 +0000618 /* XXX There is too much magic here regarding the internals of
619 spec and the location of the prefix and digits. It would be
620 better if calc_number_widths returned a number of logical
621 offsets into the buffer, and those were used. Maybe in a
622 future code cleanup. */
623
Eric Smitha9f7d622008-02-17 19:46:49 +0000624 /* Fill in the digit parts */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000625 n_leading_chars = spec.n_lpadding + spec.n_lsign +
626 spec.n_prefix + spec.n_spadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000627 memmove(p + n_leading_chars,
628 pnumeric_chars,
629 n_digits * sizeof(STRINGLIB_CHAR));
630
Eric Smitha5fa5a22008-07-16 00:11:49 +0000631 /* If type is 'X', convert the filled in digits to uppercase */
Eric Smitha9f7d622008-02-17 19:46:49 +0000632 if (format->type == 'X') {
633 Py_ssize_t t;
Eric Smith8a803dd2008-02-20 23:39:28 +0000634 for (t = 0; t < n_digits; ++t)
Eric Smitha9f7d622008-02-17 19:46:49 +0000635 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
636 }
637
Eric Smitha5fa5a22008-07-16 00:11:49 +0000638 /* Insert the grouping, if any, after the uppercasing of the digits, so
639 we can ensure that grouping chars won't be affected. */
Eric Smith5dce7e92008-06-24 11:11:59 +0000640 if (n_grouping_chars) {
Eric Smithcf537ff2008-05-11 19:52:48 +0000641 /* We know this can't fail, since we've already
642 reserved enough space. */
643 STRINGLIB_CHAR *pstart = p + n_leading_chars;
Eric Smith65fe47b2008-06-24 00:42:10 +0000644 int r = STRINGLIB_GROUPING(pstart, n_digits, n_digits,
Eric Smitha5fa5a22008-07-16 00:11:49 +0000645 spec.n_total+n_grouping_chars-n_leading_chars,
646 NULL, 0);
Eric Smithcf537ff2008-05-11 19:52:48 +0000647 assert(r);
648 }
649
Eric Smith5dce7e92008-06-24 11:11:59 +0000650 /* Fill in the non-digit parts (padding, sign, etc.) */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000651 fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars,
Eric Smith5dce7e92008-06-24 11:11:59 +0000652 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smitha9f7d622008-02-17 19:46:49 +0000653
Eric Smitha5fa5a22008-07-16 00:11:49 +0000654 /* If type is 'X', uppercase the prefix. This has to be done after the
655 prefix is filled in by fill_non_digits */
656 if (format->type == 'X') {
657 Py_ssize_t t;
658 for (t = 0; t < n_prefix; ++t)
659 p[t + spec.n_lpadding + spec.n_lsign] =
660 STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]);
661 }
662
663
Eric Smitha9f7d622008-02-17 19:46:49 +0000664done:
665 Py_XDECREF(tmp);
666 return result;
667}
668#endif /* defined FORMAT_LONG || defined FORMAT_INT */
669
670/************************************************************************/
671/*********** float formatting *******************************************/
672/************************************************************************/
673
674#ifdef FORMAT_FLOAT
675#if STRINGLIB_IS_UNICODE
676/* taken from unicodeobject.c */
677static Py_ssize_t
678strtounicode(Py_UNICODE *buffer, const char *charbuffer)
679{
680 register Py_ssize_t i;
681 Py_ssize_t len = strlen(charbuffer);
Eric Smith8a803dd2008-02-20 23:39:28 +0000682 for (i = len - 1; i >= 0; --i)
Eric Smitha9f7d622008-02-17 19:46:49 +0000683 buffer[i] = (Py_UNICODE) charbuffer[i];
684
685 return len;
686}
687#endif
688
Eric Smitha9f7d622008-02-17 19:46:49 +0000689/* see FORMATBUFLEN in unicodeobject.c */
690#define FLOAT_FORMATBUFLEN 120
691
692/* much of this is taken from unicodeobject.c */
Eric Smitha9f7d622008-02-17 19:46:49 +0000693static PyObject *
Eric Smith8a803dd2008-02-20 23:39:28 +0000694format_float_internal(PyObject *value,
695 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000696{
697 /* fmt = '%.' + `prec` + `type` + '%%'
698 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
699 char fmt[20];
700
701 /* taken from unicodeobject.c */
702 /* Worst case length calc to ensure no buffer overrun:
703
704 'g' formats:
705 fmt = %#.<prec>g
706 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
707 for any double rep.)
708 len = 1 + prec + 1 + 2 + 5 = 9 + prec
709
710 'f' formats:
711 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
712 len = 1 + 50 + 1 + prec = 52 + prec
713
714 If prec=0 the effective precision is 1 (the leading digit is
715 always given), therefore increase the length by one.
716
717 */
718 char charbuf[FLOAT_FORMATBUFLEN];
719 Py_ssize_t n_digits;
720 double x;
721 Py_ssize_t precision = format->precision;
722 PyObject *result = NULL;
723 STRINGLIB_CHAR sign;
724 char* trailing = "";
725 STRINGLIB_CHAR *p;
726 NumberFieldWidths spec;
Eric Smith8a803dd2008-02-20 23:39:28 +0000727 STRINGLIB_CHAR type = format->type;
Eric Smitha9f7d622008-02-17 19:46:49 +0000728
729#if STRINGLIB_IS_UNICODE
730 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
731#endif
732
Eric Smithd0c84122008-07-15 10:10:07 +0000733 /* alternate is not allowed on floats. */
734 if (format->alternate) {
735 PyErr_SetString(PyExc_ValueError,
736 "Alternate form (#) not allowed in float format "
737 "specifier");
738 goto done;
739 }
740
Eric Smitha9f7d622008-02-17 19:46:49 +0000741 /* first, do the conversion as 8-bit chars, using the platform's
742 snprintf. then, if needed, convert to unicode. */
743
744 /* 'F' is the same as 'f', per the PEP */
745 if (type == 'F')
746 type = 'f';
747
748 x = PyFloat_AsDouble(value);
749
750 if (x == -1.0 && PyErr_Occurred())
751 goto done;
752
753 if (type == '%') {
754 type = 'f';
755 x *= 100;
756 trailing = "%";
757 }
758
759 if (precision < 0)
760 precision = 6;
761 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
762 type = 'g';
763
764 /* cast "type", because if we're in unicode we need to pass a
765 8-bit char. this is safe, because we've restricted what "type"
766 can be */
767 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
768 (char)type);
769
Eric Smith8a803dd2008-02-20 23:39:28 +0000770 /* do the actual formatting */
771 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smitha9f7d622008-02-17 19:46:49 +0000772
773 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
774 sure why. we'll just concatentate it here, no harm done. we
775 know we can't have a buffer overflow from the fmt size
776 analysis */
777 strcat(charbuf, trailing);
778
779 /* rather than duplicate the code for snprintf for both unicode
780 and 8 bit strings, we just use the 8 bit version and then
781 convert to unicode in a separate code path. that's probably
782 the lesser of 2 evils. */
783#if STRINGLIB_IS_UNICODE
784 n_digits = strtounicode(unicodebuf, charbuf);
785 p = unicodebuf;
786#else
787 /* compute the length. I believe this is done because the return
788 value from snprintf above is unreliable */
789 n_digits = strlen(charbuf);
790 p = charbuf;
791#endif
792
793 /* is a sign character present in the output? if so, remember it
794 and skip it */
795 sign = p[0];
796 if (sign == '-') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000797 ++p;
798 --n_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000799 }
800
Eric Smithd0c84122008-07-15 10:10:07 +0000801 calc_number_widths(&spec, sign, 0, n_digits, format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000802
803 /* allocate a string with enough space */
804 result = STRINGLIB_NEW(NULL, spec.n_total);
805 if (result == NULL)
806 goto done;
807
Eric Smith5dce7e92008-06-24 11:11:59 +0000808 /* Fill in the non-digit parts (padding, sign, etc.) */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000809 fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits,
Eric Smith5dce7e92008-06-24 11:11:59 +0000810 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smitha9f7d622008-02-17 19:46:49 +0000811
812 /* fill in the digit parts */
813 memmove(STRINGLIB_STR(result) +
814 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
815 p,
816 n_digits * sizeof(STRINGLIB_CHAR));
817
818done:
819 return result;
820}
Eric Smitha9f7d622008-02-17 19:46:49 +0000821#endif /* FORMAT_FLOAT */
822
823/************************************************************************/
824/*********** built in formatters ****************************************/
825/************************************************************************/
Eric Smitha9f7d622008-02-17 19:46:49 +0000826PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000827FORMAT_STRING(PyObject *obj,
828 STRINGLIB_CHAR *format_spec,
829 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000830{
Eric Smitha9f7d622008-02-17 19:46:49 +0000831 InternalFormatSpec format;
Eric Smithdc13b792008-05-30 18:10:04 +0000832 PyObject *result = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +0000833
834 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +0000835 it equivalent to str(obj) */
836 if (format_spec_len == 0) {
837 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +0000838 goto done;
839 }
840
Eric Smitha9f7d622008-02-17 19:46:49 +0000841 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +0000842 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
843 &format, 's'))
Eric Smitha9f7d622008-02-17 19:46:49 +0000844 goto done;
845
846 /* type conversion? */
847 switch (format.type) {
848 case 's':
849 /* no type conversion needed, already a string. do the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +0000850 result = format_string_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000851 break;
852 default:
853 /* unknown */
Martin v. Löwisd918e4e2008-04-07 03:08:28 +0000854 #if STRINGLIB_IS_UNICODE
855 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
856 hence the two cases. If it is char, gcc complains that the
857 condition below is always true, hence the ifdef. */
858 if (format.type > 32 && format.type <128)
859 #endif
860 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
861 (char)format.type);
862 #if STRINGLIB_IS_UNICODE
863 else
864 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
865 (unsigned int)format.type);
866 #endif
Eric Smitha9f7d622008-02-17 19:46:49 +0000867 goto done;
868 }
869
870done:
Eric Smitha9f7d622008-02-17 19:46:49 +0000871 return result;
872}
Eric Smitha9f7d622008-02-17 19:46:49 +0000873
874#if defined FORMAT_LONG || defined FORMAT_INT
875static PyObject*
Eric Smithdc13b792008-05-30 18:10:04 +0000876format_int_or_long(PyObject* obj,
877 STRINGLIB_CHAR *format_spec,
878 Py_ssize_t format_spec_len,
879 IntOrLongToString tostring)
Eric Smitha9f7d622008-02-17 19:46:49 +0000880{
Eric Smitha9f7d622008-02-17 19:46:49 +0000881 PyObject *result = NULL;
882 PyObject *tmp = NULL;
883 InternalFormatSpec format;
884
Eric Smitha9f7d622008-02-17 19:46:49 +0000885 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +0000886 it equivalent to str(obj) */
887 if (format_spec_len == 0) {
888 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +0000889 goto done;
890 }
891
892 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +0000893 if (!parse_internal_render_format_spec(format_spec,
894 format_spec_len,
895 &format, 'd'))
Eric Smitha9f7d622008-02-17 19:46:49 +0000896 goto done;
897
898 /* type conversion? */
899 switch (format.type) {
900 case 'b':
901 case 'c':
902 case 'd':
903 case 'o':
904 case 'x':
905 case 'X':
Eric Smithcf537ff2008-05-11 19:52:48 +0000906 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000907 /* no type conversion needed, already an int (or long). do
908 the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +0000909 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smitha9f7d622008-02-17 19:46:49 +0000910 break;
911
912 case 'e':
913 case 'E':
914 case 'f':
915 case 'F':
916 case 'g':
917 case 'G':
Eric Smitha9f7d622008-02-17 19:46:49 +0000918 case '%':
919 /* convert to float */
Eric Smithdc13b792008-05-30 18:10:04 +0000920 tmp = PyNumber_Float(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +0000921 if (tmp == NULL)
922 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +0000923 result = format_float_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000924 break;
925
926 default:
927 /* unknown */
928 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
929 format.type);
930 goto done;
931 }
932
933done:
934 Py_XDECREF(tmp);
935 return result;
936}
937#endif /* FORMAT_LONG || defined FORMAT_INT */
938
939#ifdef FORMAT_LONG
940/* Need to define long_format as a function that will convert a long
941 to a string. In 3.0, _PyLong_Format has the correct signature. In
942 2.x, we need to fudge a few parameters */
943#if PY_VERSION_HEX >= 0x03000000
944#define long_format _PyLong_Format
945#else
946static PyObject*
947long_format(PyObject* value, int base)
948{
949 /* Convert to base, don't add trailing 'L', and use the new octal
950 format. We already know this is a long object */
951 assert(PyLong_Check(value));
952 /* convert to base, don't add 'L', and use the new octal format */
953 return _PyLong_Format(value, base, 0, 1);
954}
955#endif
956
957PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000958FORMAT_LONG(PyObject *obj,
959 STRINGLIB_CHAR *format_spec,
960 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000961{
Eric Smithdc13b792008-05-30 18:10:04 +0000962 return format_int_or_long(obj, format_spec, format_spec_len,
963 long_format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000964}
965#endif /* FORMAT_LONG */
966
967#ifdef FORMAT_INT
968/* this is only used for 2.x, not 3.0 */
969static PyObject*
970int_format(PyObject* value, int base)
971{
972 /* Convert to base, and use the new octal format. We already
973 know this is an int object */
974 assert(PyInt_Check(value));
975 return _PyInt_Format((PyIntObject*)value, base, 1);
976}
977
978PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000979FORMAT_INT(PyObject *obj,
980 STRINGLIB_CHAR *format_spec,
981 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000982{
Eric Smithdc13b792008-05-30 18:10:04 +0000983 return format_int_or_long(obj, format_spec, format_spec_len,
984 int_format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000985}
986#endif /* FORMAT_INT */
987
988#ifdef FORMAT_FLOAT
989PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000990FORMAT_FLOAT(PyObject *obj,
991 STRINGLIB_CHAR *format_spec,
992 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000993{
Eric Smitha9f7d622008-02-17 19:46:49 +0000994 PyObject *result = NULL;
995 InternalFormatSpec format;
996
Eric Smitha9f7d622008-02-17 19:46:49 +0000997 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +0000998 it equivalent to str(obj) */
999 if (format_spec_len == 0) {
1000 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +00001001 goto done;
1002 }
1003
1004 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +00001005 if (!parse_internal_render_format_spec(format_spec,
1006 format_spec_len,
1007 &format, '\0'))
Eric Smitha9f7d622008-02-17 19:46:49 +00001008 goto done;
1009
1010 /* type conversion? */
1011 switch (format.type) {
Eric Smith8113ca62008-03-17 11:01:01 +00001012 case '\0':
1013 /* 'Z' means like 'g', but with at least one decimal. See
1014 PyOS_ascii_formatd */
1015 format.type = 'Z';
1016 /* Deliberate fall through to the next case statement */
Eric Smitha9f7d622008-02-17 19:46:49 +00001017 case 'e':
1018 case 'E':
1019 case 'f':
1020 case 'F':
1021 case 'g':
1022 case 'G':
1023 case 'n':
1024 case '%':
1025 /* no conversion, already a float. do the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +00001026 result = format_float_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001027 break;
1028
1029 default:
1030 /* unknown */
1031 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
1032 format.type);
1033 goto done;
1034 }
1035
1036done:
1037 return result;
1038}
1039#endif /* FORMAT_FLOAT */