blob: 86235a64b508ab292bb741b3f855fde0ea3faef1 [file] [log] [blame]
Eric Smitha9f7d622008-02-17 19:46:49 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
16#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smithe9fb6862009-02-20 14:02:36 +000018/* Raises an exception about an unknown presentation type for this
19 * type. */
20
21static void
22unknown_presentation_type(STRINGLIB_CHAR presentation_type,
23 const char* type_name)
24{
25#if STRINGLIB_IS_UNICODE
26 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
27 hence the two cases. If it is char, gcc complains that the
28 condition below is always true, hence the ifdef. */
29 if (presentation_type > 32 && presentation_type < 128)
30#endif
31 PyErr_Format(PyExc_ValueError,
32 "Unknown format code '%c' "
33 "for object of type '%.200s'",
34 presentation_type,
35 type_name);
36#if STRINGLIB_IS_UNICODE
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Unknown format code '\\x%x' "
40 "for object of type '%.200s'",
41 (unsigned int)presentation_type,
42 type_name);
43#endif
44}
45
Eric Smitha9f7d622008-02-17 19:46:49 +000046/*
47 get_integer consumes 0 or more decimal digit characters from an
48 input string, updates *result with the corresponding positive
49 integer, and returns the number of digits consumed.
50
51 returns -1 on error.
52*/
53static int
54get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
55 Py_ssize_t *result)
56{
57 Py_ssize_t accumulator, digitval, oldaccumulator;
58 int numdigits;
59 accumulator = numdigits = 0;
60 for (;;(*ptr)++, numdigits++) {
61 if (*ptr >= end)
62 break;
63 digitval = STRINGLIB_TODECIMAL(**ptr);
64 if (digitval < 0)
65 break;
66 /*
67 This trick was copied from old Unicode format code. It's cute,
68 but would really suck on an old machine with a slow divide
69 implementation. Fortunately, in the normal case we do not
70 expect too many digits.
71 */
72 oldaccumulator = accumulator;
73 accumulator *= 10;
74 if ((accumulator+10)/10 != oldaccumulator+1) {
75 PyErr_Format(PyExc_ValueError,
76 "Too many decimal digits in format string");
77 return -1;
78 }
79 accumulator += digitval;
80 }
81 *result = accumulator;
82 return numdigits;
83}
84
85/************************************************************************/
86/*********** standard format specifier parsing **************************/
87/************************************************************************/
88
89/* returns true if this character is a specifier alignment token */
90Py_LOCAL_INLINE(int)
91is_alignment_token(STRINGLIB_CHAR c)
92{
93 switch (c) {
94 case '<': case '>': case '=': case '^':
95 return 1;
96 default:
97 return 0;
98 }
99}
100
101/* returns true if this character is a sign element */
102Py_LOCAL_INLINE(int)
103is_sign_element(STRINGLIB_CHAR c)
104{
105 switch (c) {
106 case ' ': case '+': case '-':
107#if ALLOW_PARENS_FOR_SIGN
108 case '(':
109#endif
110 return 1;
111 default:
112 return 0;
113 }
114}
115
116
117typedef struct {
118 STRINGLIB_CHAR fill_char;
119 STRINGLIB_CHAR align;
Eric Smithd0c84122008-07-15 10:10:07 +0000120 int alternate;
Eric Smitha9f7d622008-02-17 19:46:49 +0000121 STRINGLIB_CHAR sign;
122 Py_ssize_t width;
123 Py_ssize_t precision;
124 STRINGLIB_CHAR type;
125} InternalFormatSpec;
126
127/*
128 ptr points to the start of the format_spec, end points just past its end.
129 fills in format with the parsed information.
130 returns 1 on success, 0 on failure.
131 if failure, sets the exception
132*/
133static int
Eric Smithdc13b792008-05-30 18:10:04 +0000134parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
135 Py_ssize_t format_spec_len,
Eric Smitha9f7d622008-02-17 19:46:49 +0000136 InternalFormatSpec *format,
137 char default_type)
138{
Eric Smithdc13b792008-05-30 18:10:04 +0000139 STRINGLIB_CHAR *ptr = format_spec;
140 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smitha9f7d622008-02-17 19:46:49 +0000141
142 /* end-ptr is used throughout this code to specify the length of
143 the input string */
144
145 Py_ssize_t specified_width;
146
147 format->fill_char = '\0';
148 format->align = '\0';
Eric Smithd0c84122008-07-15 10:10:07 +0000149 format->alternate = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000150 format->sign = '\0';
151 format->width = -1;
152 format->precision = -1;
153 format->type = default_type;
154
155 /* If the second char is an alignment token,
156 then parse the fill char */
157 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
158 format->align = ptr[1];
159 format->fill_char = ptr[0];
160 ptr += 2;
161 }
162 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
163 format->align = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000164 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000165 }
166
167 /* Parse the various sign options */
168 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
169 format->sign = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000170 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000171#if ALLOW_PARENS_FOR_SIGN
172 if (end-ptr >= 1 && ptr[0] == ')') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000173 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000174 }
175#endif
176 }
177
Eric Smitha5fa5a22008-07-16 00:11:49 +0000178 /* If the next character is #, we're in alternate mode. This only
179 applies to integers. */
180 if (end-ptr >= 1 && ptr[0] == '#') {
181 format->alternate = 1;
182 ++ptr;
183 }
184
Eric Smitha9f7d622008-02-17 19:46:49 +0000185 /* The special case for 0-padding (backwards compat) */
186 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
187 format->fill_char = '0';
188 if (format->align == '\0') {
189 format->align = '=';
190 }
Eric Smith8a803dd2008-02-20 23:39:28 +0000191 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000192 }
193
194 /* XXX add error checking */
195 specified_width = get_integer(&ptr, end, &format->width);
196
197 /* if specified_width is 0, we didn't consume any characters for
198 the width. in that case, reset the width to -1, because
199 get_integer() will have set it to zero */
200 if (specified_width == 0) {
201 format->width = -1;
202 }
203
204 /* Parse field precision */
205 if (end-ptr && ptr[0] == '.') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000206 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000207
208 /* XXX add error checking */
209 specified_width = get_integer(&ptr, end, &format->precision);
210
211 /* not having a precision after a dot is an error */
212 if (specified_width == 0) {
213 PyErr_Format(PyExc_ValueError,
214 "Format specifier missing precision");
215 return 0;
216 }
217
218 }
219
220 /* Finally, parse the type field */
221
222 if (end-ptr > 1) {
223 /* invalid conversion spec */
224 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
225 return 0;
226 }
227
228 if (end-ptr == 1) {
229 format->type = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000230 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000231 }
232
233 return 1;
234}
235
236#if defined FORMAT_FLOAT || defined FORMAT_LONG
237/************************************************************************/
238/*********** common routines for numeric formatting *********************/
239/************************************************************************/
240
241/* describes the layout for an integer, see the comment in
Eric Smitha5fa5a22008-07-16 00:11:49 +0000242 calc_number_widths() for details */
Eric Smitha9f7d622008-02-17 19:46:49 +0000243typedef struct {
244 Py_ssize_t n_lpadding;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000245 Py_ssize_t n_prefix;
Eric Smitha9f7d622008-02-17 19:46:49 +0000246 Py_ssize_t n_spadding;
247 Py_ssize_t n_rpadding;
248 char lsign;
249 Py_ssize_t n_lsign;
250 char rsign;
251 Py_ssize_t n_rsign;
252 Py_ssize_t n_total; /* just a convenience, it's derivable from the
253 other fields */
254} NumberFieldWidths;
255
256/* not all fields of format are used. for example, precision is
257 unused. should this take discrete params in order to be more clear
258 about what it does? or is passing a single format parameter easier
259 and more efficient enough to justify a little obfuscation? */
260static void
Eric Smith4cb965c2008-07-16 18:29:51 +0000261calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign,
Eric Smithd0c84122008-07-15 10:10:07 +0000262 Py_ssize_t n_prefix, Py_ssize_t n_digits,
263 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000264{
Eric Smith4cb965c2008-07-16 18:29:51 +0000265 spec->n_lpadding = 0;
266 spec->n_prefix = 0;
267 spec->n_spadding = 0;
268 spec->n_rpadding = 0;
269 spec->lsign = '\0';
270 spec->n_lsign = 0;
271 spec->rsign = '\0';
272 spec->n_rsign = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000273
274 /* the output will look like:
Eric Smithd0c84122008-07-15 10:10:07 +0000275 | |
276 | <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> |
277 | |
Eric Smitha9f7d622008-02-17 19:46:49 +0000278
279 lsign and rsign are computed from format->sign and the actual
280 sign of the number
281
Eric Smithd0c84122008-07-15 10:10:07 +0000282 prefix is given (it's for the '0x' prefix)
283
Eric Smitha9f7d622008-02-17 19:46:49 +0000284 digits is already known
285
286 the total width is either given, or computed from the
287 actual digits
288
289 only one of lpadding, spadding, and rpadding can be non-zero,
290 and it's calculated from the width and other fields
291 */
292
293 /* compute the various parts we're going to write */
294 if (format->sign == '+') {
295 /* always put a + or - */
Eric Smith4cb965c2008-07-16 18:29:51 +0000296 spec->n_lsign = 1;
297 spec->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smitha9f7d622008-02-17 19:46:49 +0000298 }
299#if ALLOW_PARENS_FOR_SIGN
300 else if (format->sign == '(') {
301 if (actual_sign == '-') {
Eric Smith4cb965c2008-07-16 18:29:51 +0000302 spec->n_lsign = 1;
303 spec->lsign = '(';
304 spec->n_rsign = 1;
305 spec->rsign = ')';
Eric Smitha9f7d622008-02-17 19:46:49 +0000306 }
307 }
308#endif
309 else if (format->sign == ' ') {
Eric Smith4cb965c2008-07-16 18:29:51 +0000310 spec->n_lsign = 1;
311 spec->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smitha9f7d622008-02-17 19:46:49 +0000312 }
313 else {
314 /* non specified, or the default (-) */
315 if (actual_sign == '-') {
Eric Smith4cb965c2008-07-16 18:29:51 +0000316 spec->n_lsign = 1;
317 spec->lsign = '-';
Eric Smitha9f7d622008-02-17 19:46:49 +0000318 }
319 }
320
Eric Smith4cb965c2008-07-16 18:29:51 +0000321 spec->n_prefix = n_prefix;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000322
Eric Smitha9f7d622008-02-17 19:46:49 +0000323 /* now the number of padding characters */
324 if (format->width == -1) {
325 /* no padding at all, nothing to do */
326 }
327 else {
328 /* see if any padding is needed */
Eric Smith4cb965c2008-07-16 18:29:51 +0000329 if (spec->n_lsign + n_digits + spec->n_rsign +
330 spec->n_prefix >= format->width) {
Eric Smitha9f7d622008-02-17 19:46:49 +0000331 /* no padding needed, we're already bigger than the
332 requested width */
333 }
334 else {
335 /* determine which of left, space, or right padding is
336 needed */
337 Py_ssize_t padding = format->width -
Eric Smith4cb965c2008-07-16 18:29:51 +0000338 (spec->n_lsign + spec->n_prefix +
339 n_digits + spec->n_rsign);
Eric Smitha9f7d622008-02-17 19:46:49 +0000340 if (format->align == '<')
Eric Smith4cb965c2008-07-16 18:29:51 +0000341 spec->n_rpadding = padding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000342 else if (format->align == '>')
Eric Smith4cb965c2008-07-16 18:29:51 +0000343 spec->n_lpadding = padding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000344 else if (format->align == '^') {
Eric Smith4cb965c2008-07-16 18:29:51 +0000345 spec->n_lpadding = padding / 2;
346 spec->n_rpadding = padding - spec->n_lpadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000347 }
348 else if (format->align == '=')
Eric Smith4cb965c2008-07-16 18:29:51 +0000349 spec->n_spadding = padding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000350 else
Eric Smith4cb965c2008-07-16 18:29:51 +0000351 spec->n_lpadding = padding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000352 }
353 }
Eric Smith4cb965c2008-07-16 18:29:51 +0000354 spec->n_total = spec->n_lpadding + spec->n_lsign + spec->n_prefix +
355 spec->n_spadding + n_digits + spec->n_rsign + spec->n_rpadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000356}
357
358/* fill in the non-digit parts of a numbers's string representation,
Eric Smitha5fa5a22008-07-16 00:11:49 +0000359 as determined in calc_number_widths(). returns the pointer to
Eric Smitha9f7d622008-02-17 19:46:49 +0000360 where the digits go. */
361static STRINGLIB_CHAR *
Eric Smith5dce7e92008-06-24 11:11:59 +0000362fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
Eric Smitha5fa5a22008-07-16 00:11:49 +0000363 STRINGLIB_CHAR *prefix, Py_ssize_t n_digits,
364 STRINGLIB_CHAR fill_char)
Eric Smitha9f7d622008-02-17 19:46:49 +0000365{
Eric Smitha5fa5a22008-07-16 00:11:49 +0000366 STRINGLIB_CHAR *p_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000367
368 if (spec->n_lpadding) {
369 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
370 p_buf += spec->n_lpadding;
371 }
372 if (spec->n_lsign == 1) {
373 *p_buf++ = spec->lsign;
374 }
Eric Smitha5fa5a22008-07-16 00:11:49 +0000375 if (spec->n_prefix) {
376 memmove(p_buf,
377 prefix,
378 spec->n_prefix * sizeof(STRINGLIB_CHAR));
379 p_buf += spec->n_prefix;
380 }
Eric Smitha9f7d622008-02-17 19:46:49 +0000381 if (spec->n_spadding) {
382 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
383 p_buf += spec->n_spadding;
384 }
385 p_digits = p_buf;
386 p_buf += n_digits;
387 if (spec->n_rsign == 1) {
388 *p_buf++ = spec->rsign;
389 }
390 if (spec->n_rpadding) {
391 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
392 p_buf += spec->n_rpadding;
393 }
394 return p_digits;
395}
396#endif /* FORMAT_FLOAT || FORMAT_LONG */
397
398/************************************************************************/
399/*********** string formatting ******************************************/
400/************************************************************************/
401
402static PyObject *
403format_string_internal(PyObject *value, const InternalFormatSpec *format)
404{
405 Py_ssize_t width; /* total field width */
406 Py_ssize_t lpad;
407 STRINGLIB_CHAR *dst;
408 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
409 Py_ssize_t len = STRINGLIB_LEN(value);
410 PyObject *result = NULL;
411
412 /* sign is not allowed on strings */
413 if (format->sign != '\0') {
414 PyErr_SetString(PyExc_ValueError,
415 "Sign not allowed in string format specifier");
416 goto done;
417 }
418
Eric Smithd0c84122008-07-15 10:10:07 +0000419 /* alternate is not allowed on strings */
420 if (format->alternate) {
421 PyErr_SetString(PyExc_ValueError,
422 "Alternate form (#) not allowed in string format "
423 "specifier");
424 goto done;
425 }
426
Eric Smitha9f7d622008-02-17 19:46:49 +0000427 /* '=' alignment not allowed on strings */
428 if (format->align == '=') {
429 PyErr_SetString(PyExc_ValueError,
430 "'=' alignment not allowed "
431 "in string format specifier");
432 goto done;
433 }
434
435 /* if precision is specified, output no more that format.precision
436 characters */
437 if (format->precision >= 0 && len >= format->precision) {
438 len = format->precision;
439 }
440
441 if (format->width >= 0) {
442 width = format->width;
443
444 /* but use at least len characters */
445 if (len > width) {
446 width = len;
447 }
448 }
449 else {
450 /* not specified, use all of the chars and no more */
451 width = len;
452 }
453
454 /* allocate the resulting string */
455 result = STRINGLIB_NEW(NULL, width);
456 if (result == NULL)
457 goto done;
458
459 /* now write into that space */
460 dst = STRINGLIB_STR(result);
461
462 /* figure out how much leading space we need, based on the
463 aligning */
464 if (format->align == '>')
465 lpad = width - len;
466 else if (format->align == '^')
467 lpad = (width - len) / 2;
468 else
469 lpad = 0;
470
471 /* if right aligning, increment the destination allow space on the
472 left */
473 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
474
475 /* do any padding */
476 if (width > len) {
477 STRINGLIB_CHAR fill_char = format->fill_char;
478 if (fill_char == '\0') {
479 /* use the default, if not specified */
480 fill_char = ' ';
481 }
482
483 /* pad on left */
484 if (lpad)
485 STRINGLIB_FILL(dst, fill_char, lpad);
486
487 /* pad on right */
488 if (width - len - lpad)
489 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
490 }
491
492done:
493 return result;
494}
495
496
497/************************************************************************/
498/*********** long formatting ********************************************/
499/************************************************************************/
500
501#if defined FORMAT_LONG || defined FORMAT_INT
502typedef PyObject*
503(*IntOrLongToString)(PyObject *value, int base);
504
505static PyObject *
506format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
507 IntOrLongToString tostring)
508{
509 PyObject *result = NULL;
510 PyObject *tmp = NULL;
511 STRINGLIB_CHAR *pnumeric_chars;
512 STRINGLIB_CHAR numeric_char;
513 STRINGLIB_CHAR sign = '\0';
514 STRINGLIB_CHAR *p;
515 Py_ssize_t n_digits; /* count of digits need from the computed
516 string */
517 Py_ssize_t n_leading_chars;
Eric Smithcf537ff2008-05-11 19:52:48 +0000518 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
519 allocate, used for 'n'
520 formatting. */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000521 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
522 STRINGLIB_CHAR *prefix = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +0000523 NumberFieldWidths spec;
524 long x;
525
526 /* no precision allowed on integers */
527 if (format->precision != -1) {
528 PyErr_SetString(PyExc_ValueError,
529 "Precision not allowed in integer format specifier");
530 goto done;
531 }
532
533
534 /* special case for character formatting */
535 if (format->type == 'c') {
536 /* error to specify a sign */
537 if (format->sign != '\0') {
538 PyErr_SetString(PyExc_ValueError,
539 "Sign not allowed with integer"
540 " format specifier 'c'");
541 goto done;
542 }
543
544 /* taken from unicodeobject.c formatchar() */
545 /* Integer input truncated to a character */
546/* XXX: won't work for int */
547 x = PyLong_AsLong(value);
548 if (x == -1 && PyErr_Occurred())
549 goto done;
550#ifdef Py_UNICODE_WIDE
551 if (x < 0 || x > 0x10ffff) {
552 PyErr_SetString(PyExc_OverflowError,
553 "%c arg not in range(0x110000) "
554 "(wide Python build)");
555 goto done;
556 }
557#else
558 if (x < 0 || x > 0xffff) {
559 PyErr_SetString(PyExc_OverflowError,
560 "%c arg not in range(0x10000) "
561 "(narrow Python build)");
562 goto done;
563 }
564#endif
565 numeric_char = (STRINGLIB_CHAR)x;
566 pnumeric_chars = &numeric_char;
567 n_digits = 1;
568 }
569 else {
570 int base;
Eric Smithd0c84122008-07-15 10:10:07 +0000571 int leading_chars_to_skip = 0; /* Number of characters added by
572 PyNumber_ToBase that we want to
573 skip over. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000574
575 /* Compute the base and how many characters will be added by
576 PyNumber_ToBase */
577 switch (format->type) {
578 case 'b':
579 base = 2;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000580 leading_chars_to_skip = 2; /* 0b */
Eric Smitha9f7d622008-02-17 19:46:49 +0000581 break;
582 case 'o':
583 base = 8;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000584 leading_chars_to_skip = 2; /* 0o */
Eric Smitha9f7d622008-02-17 19:46:49 +0000585 break;
586 case 'x':
587 case 'X':
588 base = 16;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000589 leading_chars_to_skip = 2; /* 0x */
Eric Smitha9f7d622008-02-17 19:46:49 +0000590 break;
591 default: /* shouldn't be needed, but stops a compiler warning */
592 case 'd':
Eric Smithcf537ff2008-05-11 19:52:48 +0000593 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000594 base = 10;
Eric Smitha9f7d622008-02-17 19:46:49 +0000595 break;
596 }
597
Eric Smitha5fa5a22008-07-16 00:11:49 +0000598 /* The number of prefix chars is the same as the leading
599 chars to skip */
600 if (format->alternate)
601 n_prefix = leading_chars_to_skip;
602
Eric Smitha9f7d622008-02-17 19:46:49 +0000603 /* Do the hard part, converting to a string in a given base */
604 tmp = tostring(value, base);
605 if (tmp == NULL)
606 goto done;
607
608 pnumeric_chars = STRINGLIB_STR(tmp);
609 n_digits = STRINGLIB_LEN(tmp);
610
Eric Smitha5fa5a22008-07-16 00:11:49 +0000611 prefix = pnumeric_chars;
612
Eric Smitha9f7d622008-02-17 19:46:49 +0000613 /* Remember not to modify what pnumeric_chars points to. it
614 might be interned. Only modify it after we copy it into a
615 newly allocated output buffer. */
616
617 /* Is a sign character present in the output? If so, remember it
618 and skip it */
619 sign = pnumeric_chars[0];
620 if (sign == '-') {
Eric Smitha5fa5a22008-07-16 00:11:49 +0000621 ++prefix;
Eric Smitha9f7d622008-02-17 19:46:49 +0000622 ++leading_chars_to_skip;
623 }
624
625 /* Skip over the leading chars (0x, 0b, etc.) */
626 n_digits -= leading_chars_to_skip;
627 pnumeric_chars += leading_chars_to_skip;
628 }
629
Eric Smithcf537ff2008-05-11 19:52:48 +0000630 if (format->type == 'n')
631 /* Compute how many additional chars we need to allocate
632 to hold the thousands grouping. */
Eric Smith65fe47b2008-06-24 00:42:10 +0000633 STRINGLIB_GROUPING(NULL, n_digits, n_digits,
Eric Smithcf537ff2008-05-11 19:52:48 +0000634 0, &n_grouping_chars, 0);
635
Eric Smith5dce7e92008-06-24 11:11:59 +0000636 /* Calculate the widths of the various leading and trailing parts */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000637 calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars,
638 format);
Eric Smith5dce7e92008-06-24 11:11:59 +0000639
Eric Smitha9f7d622008-02-17 19:46:49 +0000640 /* Allocate a new string to hold the result */
Eric Smith5dce7e92008-06-24 11:11:59 +0000641 result = STRINGLIB_NEW(NULL, spec.n_total);
Eric Smitha9f7d622008-02-17 19:46:49 +0000642 if (!result)
643 goto done;
644 p = STRINGLIB_STR(result);
645
Eric Smitha5fa5a22008-07-16 00:11:49 +0000646 /* XXX There is too much magic here regarding the internals of
647 spec and the location of the prefix and digits. It would be
648 better if calc_number_widths returned a number of logical
649 offsets into the buffer, and those were used. Maybe in a
650 future code cleanup. */
651
Eric Smitha9f7d622008-02-17 19:46:49 +0000652 /* Fill in the digit parts */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000653 n_leading_chars = spec.n_lpadding + spec.n_lsign +
654 spec.n_prefix + spec.n_spadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000655 memmove(p + n_leading_chars,
656 pnumeric_chars,
657 n_digits * sizeof(STRINGLIB_CHAR));
658
Eric Smitha5fa5a22008-07-16 00:11:49 +0000659 /* If type is 'X', convert the filled in digits to uppercase */
Eric Smitha9f7d622008-02-17 19:46:49 +0000660 if (format->type == 'X') {
661 Py_ssize_t t;
Eric Smith8a803dd2008-02-20 23:39:28 +0000662 for (t = 0; t < n_digits; ++t)
Eric Smitha9f7d622008-02-17 19:46:49 +0000663 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
664 }
665
Eric Smitha5fa5a22008-07-16 00:11:49 +0000666 /* Insert the grouping, if any, after the uppercasing of the digits, so
667 we can ensure that grouping chars won't be affected. */
Eric Smith5dce7e92008-06-24 11:11:59 +0000668 if (n_grouping_chars) {
Eric Smithcf537ff2008-05-11 19:52:48 +0000669 /* We know this can't fail, since we've already
670 reserved enough space. */
671 STRINGLIB_CHAR *pstart = p + n_leading_chars;
Neal Norwitz18aa3882008-08-24 05:04:52 +0000672#ifndef NDEBUG
673 int r =
674#endif
675 STRINGLIB_GROUPING(pstart, n_digits, n_digits,
Eric Smitha5fa5a22008-07-16 00:11:49 +0000676 spec.n_total+n_grouping_chars-n_leading_chars,
677 NULL, 0);
Eric Smithcf537ff2008-05-11 19:52:48 +0000678 assert(r);
679 }
680
Eric Smith5dce7e92008-06-24 11:11:59 +0000681 /* Fill in the non-digit parts (padding, sign, etc.) */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000682 fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars,
Eric Smith5dce7e92008-06-24 11:11:59 +0000683 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smitha9f7d622008-02-17 19:46:49 +0000684
Eric Smitha5fa5a22008-07-16 00:11:49 +0000685 /* If type is 'X', uppercase the prefix. This has to be done after the
686 prefix is filled in by fill_non_digits */
687 if (format->type == 'X') {
688 Py_ssize_t t;
689 for (t = 0; t < n_prefix; ++t)
690 p[t + spec.n_lpadding + spec.n_lsign] =
691 STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]);
692 }
693
694
Eric Smitha9f7d622008-02-17 19:46:49 +0000695done:
696 Py_XDECREF(tmp);
697 return result;
698}
699#endif /* defined FORMAT_LONG || defined FORMAT_INT */
700
701/************************************************************************/
702/*********** float formatting *******************************************/
703/************************************************************************/
704
705#ifdef FORMAT_FLOAT
706#if STRINGLIB_IS_UNICODE
707/* taken from unicodeobject.c */
708static Py_ssize_t
709strtounicode(Py_UNICODE *buffer, const char *charbuffer)
710{
711 register Py_ssize_t i;
712 Py_ssize_t len = strlen(charbuffer);
Eric Smith8a803dd2008-02-20 23:39:28 +0000713 for (i = len - 1; i >= 0; --i)
Eric Smitha9f7d622008-02-17 19:46:49 +0000714 buffer[i] = (Py_UNICODE) charbuffer[i];
715
716 return len;
717}
718#endif
719
Eric Smitha9f7d622008-02-17 19:46:49 +0000720/* see FORMATBUFLEN in unicodeobject.c */
721#define FLOAT_FORMATBUFLEN 120
722
723/* much of this is taken from unicodeobject.c */
Eric Smitha9f7d622008-02-17 19:46:49 +0000724static PyObject *
Eric Smith8a803dd2008-02-20 23:39:28 +0000725format_float_internal(PyObject *value,
726 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000727{
728 /* fmt = '%.' + `prec` + `type` + '%%'
729 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
730 char fmt[20];
731
732 /* taken from unicodeobject.c */
733 /* Worst case length calc to ensure no buffer overrun:
734
735 'g' formats:
736 fmt = %#.<prec>g
737 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
738 for any double rep.)
739 len = 1 + prec + 1 + 2 + 5 = 9 + prec
740
741 'f' formats:
742 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
743 len = 1 + 50 + 1 + prec = 52 + prec
744
745 If prec=0 the effective precision is 1 (the leading digit is
746 always given), therefore increase the length by one.
747
748 */
749 char charbuf[FLOAT_FORMATBUFLEN];
750 Py_ssize_t n_digits;
751 double x;
752 Py_ssize_t precision = format->precision;
753 PyObject *result = NULL;
754 STRINGLIB_CHAR sign;
755 char* trailing = "";
756 STRINGLIB_CHAR *p;
757 NumberFieldWidths spec;
Eric Smith8a803dd2008-02-20 23:39:28 +0000758 STRINGLIB_CHAR type = format->type;
Eric Smitha9f7d622008-02-17 19:46:49 +0000759
760#if STRINGLIB_IS_UNICODE
761 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
762#endif
763
Eric Smithd0c84122008-07-15 10:10:07 +0000764 /* alternate is not allowed on floats. */
765 if (format->alternate) {
766 PyErr_SetString(PyExc_ValueError,
767 "Alternate form (#) not allowed in float format "
768 "specifier");
769 goto done;
770 }
771
Eric Smitha9f7d622008-02-17 19:46:49 +0000772 /* first, do the conversion as 8-bit chars, using the platform's
773 snprintf. then, if needed, convert to unicode. */
774
Eric Smithd6c393a2008-07-17 19:49:47 +0000775 /* 'F' is the same as 'f', per the PEP */
776 if (type == 'F')
777 type = 'f';
778
Eric Smitha9f7d622008-02-17 19:46:49 +0000779 x = PyFloat_AsDouble(value);
780
781 if (x == -1.0 && PyErr_Occurred())
782 goto done;
783
784 if (type == '%') {
785 type = 'f';
786 x *= 100;
787 trailing = "%";
788 }
789
790 if (precision < 0)
791 precision = 6;
Mark Dickinson2e648ec2009-03-29 14:37:51 +0000792 if (type == 'f' && fabs(x) >= 1e50)
Eric Smithd6c393a2008-07-17 19:49:47 +0000793 type = 'g';
Eric Smitha9f7d622008-02-17 19:46:49 +0000794
795 /* cast "type", because if we're in unicode we need to pass a
796 8-bit char. this is safe, because we've restricted what "type"
797 can be */
798 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
799 (char)type);
800
Eric Smith8a803dd2008-02-20 23:39:28 +0000801 /* do the actual formatting */
802 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smitha9f7d622008-02-17 19:46:49 +0000803
804 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
805 sure why. we'll just concatentate it here, no harm done. we
806 know we can't have a buffer overflow from the fmt size
807 analysis */
808 strcat(charbuf, trailing);
809
810 /* rather than duplicate the code for snprintf for both unicode
811 and 8 bit strings, we just use the 8 bit version and then
812 convert to unicode in a separate code path. that's probably
813 the lesser of 2 evils. */
814#if STRINGLIB_IS_UNICODE
815 n_digits = strtounicode(unicodebuf, charbuf);
816 p = unicodebuf;
817#else
818 /* compute the length. I believe this is done because the return
819 value from snprintf above is unreliable */
820 n_digits = strlen(charbuf);
821 p = charbuf;
822#endif
823
824 /* is a sign character present in the output? if so, remember it
825 and skip it */
826 sign = p[0];
827 if (sign == '-') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000828 ++p;
829 --n_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000830 }
831
Eric Smithd0c84122008-07-15 10:10:07 +0000832 calc_number_widths(&spec, sign, 0, n_digits, format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000833
834 /* allocate a string with enough space */
835 result = STRINGLIB_NEW(NULL, spec.n_total);
836 if (result == NULL)
837 goto done;
838
Eric Smith5dce7e92008-06-24 11:11:59 +0000839 /* Fill in the non-digit parts (padding, sign, etc.) */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000840 fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits,
Eric Smith5dce7e92008-06-24 11:11:59 +0000841 format->fill_char == '\0' ? ' ' : format->fill_char);
Eric Smitha9f7d622008-02-17 19:46:49 +0000842
843 /* fill in the digit parts */
844 memmove(STRINGLIB_STR(result) +
845 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
846 p,
847 n_digits * sizeof(STRINGLIB_CHAR));
848
849done:
850 return result;
851}
Eric Smitha9f7d622008-02-17 19:46:49 +0000852#endif /* FORMAT_FLOAT */
853
854/************************************************************************/
855/*********** built in formatters ****************************************/
856/************************************************************************/
Eric Smitha9f7d622008-02-17 19:46:49 +0000857PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000858FORMAT_STRING(PyObject *obj,
859 STRINGLIB_CHAR *format_spec,
860 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000861{
Eric Smitha9f7d622008-02-17 19:46:49 +0000862 InternalFormatSpec format;
Eric Smithdc13b792008-05-30 18:10:04 +0000863 PyObject *result = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +0000864
865 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +0000866 it equivalent to str(obj) */
867 if (format_spec_len == 0) {
868 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +0000869 goto done;
870 }
871
Eric Smitha9f7d622008-02-17 19:46:49 +0000872 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +0000873 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
874 &format, 's'))
Eric Smitha9f7d622008-02-17 19:46:49 +0000875 goto done;
876
877 /* type conversion? */
878 switch (format.type) {
879 case 's':
880 /* no type conversion needed, already a string. do the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +0000881 result = format_string_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000882 break;
883 default:
884 /* unknown */
Eric Smithe9fb6862009-02-20 14:02:36 +0000885 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smitha9f7d622008-02-17 19:46:49 +0000886 goto done;
887 }
888
889done:
Eric Smitha9f7d622008-02-17 19:46:49 +0000890 return result;
891}
Eric Smitha9f7d622008-02-17 19:46:49 +0000892
893#if defined FORMAT_LONG || defined FORMAT_INT
894static PyObject*
Eric Smithdc13b792008-05-30 18:10:04 +0000895format_int_or_long(PyObject* obj,
896 STRINGLIB_CHAR *format_spec,
897 Py_ssize_t format_spec_len,
898 IntOrLongToString tostring)
Eric Smitha9f7d622008-02-17 19:46:49 +0000899{
Eric Smitha9f7d622008-02-17 19:46:49 +0000900 PyObject *result = NULL;
901 PyObject *tmp = NULL;
902 InternalFormatSpec format;
903
Eric Smitha9f7d622008-02-17 19:46:49 +0000904 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +0000905 it equivalent to str(obj) */
906 if (format_spec_len == 0) {
907 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +0000908 goto done;
909 }
910
911 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +0000912 if (!parse_internal_render_format_spec(format_spec,
913 format_spec_len,
914 &format, 'd'))
Eric Smitha9f7d622008-02-17 19:46:49 +0000915 goto done;
916
917 /* type conversion? */
918 switch (format.type) {
919 case 'b':
920 case 'c':
921 case 'd':
922 case 'o':
923 case 'x':
924 case 'X':
Eric Smithcf537ff2008-05-11 19:52:48 +0000925 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000926 /* no type conversion needed, already an int (or long). do
927 the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +0000928 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smitha9f7d622008-02-17 19:46:49 +0000929 break;
930
931 case 'e':
932 case 'E':
933 case 'f':
934 case 'F':
935 case 'g':
936 case 'G':
Eric Smitha9f7d622008-02-17 19:46:49 +0000937 case '%':
938 /* convert to float */
Eric Smithdc13b792008-05-30 18:10:04 +0000939 tmp = PyNumber_Float(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +0000940 if (tmp == NULL)
941 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +0000942 result = format_float_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000943 break;
944
945 default:
946 /* unknown */
Eric Smithe9fb6862009-02-20 14:02:36 +0000947 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smitha9f7d622008-02-17 19:46:49 +0000948 goto done;
949 }
950
951done:
952 Py_XDECREF(tmp);
953 return result;
954}
955#endif /* FORMAT_LONG || defined FORMAT_INT */
956
957#ifdef FORMAT_LONG
958/* Need to define long_format as a function that will convert a long
959 to a string. In 3.0, _PyLong_Format has the correct signature. In
960 2.x, we need to fudge a few parameters */
961#if PY_VERSION_HEX >= 0x03000000
962#define long_format _PyLong_Format
963#else
964static PyObject*
965long_format(PyObject* value, int base)
966{
967 /* Convert to base, don't add trailing 'L', and use the new octal
968 format. We already know this is a long object */
969 assert(PyLong_Check(value));
970 /* convert to base, don't add 'L', and use the new octal format */
971 return _PyLong_Format(value, base, 0, 1);
972}
973#endif
974
975PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000976FORMAT_LONG(PyObject *obj,
977 STRINGLIB_CHAR *format_spec,
978 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000979{
Eric Smithdc13b792008-05-30 18:10:04 +0000980 return format_int_or_long(obj, format_spec, format_spec_len,
981 long_format);
Eric Smitha9f7d622008-02-17 19:46:49 +0000982}
983#endif /* FORMAT_LONG */
984
985#ifdef FORMAT_INT
986/* this is only used for 2.x, not 3.0 */
987static PyObject*
988int_format(PyObject* value, int base)
989{
990 /* Convert to base, and use the new octal format. We already
991 know this is an int object */
992 assert(PyInt_Check(value));
993 return _PyInt_Format((PyIntObject*)value, base, 1);
994}
995
996PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +0000997FORMAT_INT(PyObject *obj,
998 STRINGLIB_CHAR *format_spec,
999 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +00001000{
Eric Smithdc13b792008-05-30 18:10:04 +00001001 return format_int_or_long(obj, format_spec, format_spec_len,
1002 int_format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001003}
1004#endif /* FORMAT_INT */
1005
1006#ifdef FORMAT_FLOAT
1007PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +00001008FORMAT_FLOAT(PyObject *obj,
1009 STRINGLIB_CHAR *format_spec,
1010 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +00001011{
Eric Smitha9f7d622008-02-17 19:46:49 +00001012 PyObject *result = NULL;
1013 InternalFormatSpec format;
1014
Eric Smitha9f7d622008-02-17 19:46:49 +00001015 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +00001016 it equivalent to str(obj) */
1017 if (format_spec_len == 0) {
1018 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +00001019 goto done;
1020 }
1021
1022 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +00001023 if (!parse_internal_render_format_spec(format_spec,
1024 format_spec_len,
1025 &format, '\0'))
Eric Smitha9f7d622008-02-17 19:46:49 +00001026 goto done;
1027
1028 /* type conversion? */
1029 switch (format.type) {
Eric Smith8113ca62008-03-17 11:01:01 +00001030 case '\0':
1031 /* 'Z' means like 'g', but with at least one decimal. See
1032 PyOS_ascii_formatd */
1033 format.type = 'Z';
1034 /* Deliberate fall through to the next case statement */
Eric Smitha9f7d622008-02-17 19:46:49 +00001035 case 'e':
1036 case 'E':
1037 case 'f':
1038 case 'F':
1039 case 'g':
1040 case 'G':
1041 case 'n':
1042 case '%':
1043 /* no conversion, already a float. do the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +00001044 result = format_float_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001045 break;
1046
1047 default:
1048 /* unknown */
Eric Smithe9fb6862009-02-20 14:02:36 +00001049 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smitha9f7d622008-02-17 19:46:49 +00001050 goto done;
1051 }
1052
1053done:
1054 return result;
1055}
1056#endif /* FORMAT_FLOAT */