blob: 22dd292ce9ea41313ce19d802b6139473cd4ee4b [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
4/* Before including this, you must include either:
5 stringlib/unicodedefs.h
6 stringlib/stringdefs.h
7
8 Also, you should define the names:
9 FORMAT_STRING
10 FORMAT_LONG
11 FORMAT_FLOAT
12 to be whatever you want the public names of these functions to
13 be. These are the only non-static functions defined here.
14*/
15
Eric Smithb7f5ba12007-08-29 12:38:45 +000016#define ALLOW_PARENS_FOR_SIGN 0
17
Eric Smith8c663262007-08-25 02:26:07 +000018/*
19 get_integer consumes 0 or more decimal digit characters from an
20 input string, updates *result with the corresponding positive
21 integer, and returns the number of digits consumed.
22
23 returns -1 on error.
24*/
25static int
26get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
27 Py_ssize_t *result)
28{
29 Py_ssize_t accumulator, digitval, oldaccumulator;
30 int numdigits;
31 accumulator = numdigits = 0;
32 for (;;(*ptr)++, numdigits++) {
33 if (*ptr >= end)
34 break;
35 digitval = STRINGLIB_TODECIMAL(**ptr);
36 if (digitval < 0)
37 break;
38 /*
39 This trick was copied from old Unicode format code. It's cute,
40 but would really suck on an old machine with a slow divide
41 implementation. Fortunately, in the normal case we do not
42 expect too many digits.
43 */
44 oldaccumulator = accumulator;
45 accumulator *= 10;
46 if ((accumulator+10)/10 != oldaccumulator+1) {
47 PyErr_Format(PyExc_ValueError,
48 "Too many decimal digits in format string");
49 return -1;
50 }
51 accumulator += digitval;
52 }
53 *result = accumulator;
54 return numdigits;
55}
56
57/************************************************************************/
58/*********** standard format specifier parsing **************************/
59/************************************************************************/
60
61/* returns true if this character is a specifier alignment token */
62Py_LOCAL_INLINE(int)
63is_alignment_token(STRINGLIB_CHAR c)
64{
65 switch (c) {
66 case '<': case '>': case '=': case '^':
67 return 1;
68 default:
69 return 0;
70 }
71}
72
73/* returns true if this character is a sign element */
74Py_LOCAL_INLINE(int)
75is_sign_element(STRINGLIB_CHAR c)
76{
77 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +000078 case ' ': case '+': case '-':
Eric Smith44300952007-08-29 12:43:12 +000079#if ALLOW_PARENS_FOR_SIGN
Eric Smithb7f5ba12007-08-29 12:38:45 +000080 case '(':
Eric Smith44300952007-08-29 12:43:12 +000081#endif
Eric Smith8c663262007-08-25 02:26:07 +000082 return 1;
83 default:
84 return 0;
85 }
86}
87
88
89typedef struct {
90 STRINGLIB_CHAR fill_char;
91 STRINGLIB_CHAR align;
92 STRINGLIB_CHAR sign;
93 Py_ssize_t width;
94 Py_ssize_t precision;
95 STRINGLIB_CHAR type;
96} InternalFormatSpec;
97
98/*
99 ptr points to the start of the format_spec, end points just past its end.
100 fills in format with the parsed information.
101 returns 1 on success, 0 on failure.
102 if failure, sets the exception
103*/
104static int
105parse_internal_render_format_spec(PyObject *format_spec,
106 InternalFormatSpec *format,
107 char default_type)
108{
109 STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec);
110 STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec);
111
112 /* end-ptr is used throughout this code to specify the length of
113 the input string */
114
115 Py_ssize_t specified_width;
116
117 format->fill_char = '\0';
118 format->align = '\0';
119 format->sign = '\0';
120 format->width = -1;
121 format->precision = -1;
122 format->type = default_type;
123
124 /* If the second char is an alignment token,
125 then parse the fill char */
126 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
127 format->align = ptr[1];
128 format->fill_char = ptr[0];
129 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000130 }
131 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000132 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000133 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000134 }
135
136 /* Parse the various sign options */
137 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
138 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000139 ++ptr;
Eric Smithb7f5ba12007-08-29 12:38:45 +0000140#if ALLOW_PARENS_FOR_SIGN
Eric Smith8c663262007-08-25 02:26:07 +0000141 if (end-ptr >= 1 && ptr[0] == ')') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000142 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000143 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000144#endif
Eric Smith8c663262007-08-25 02:26:07 +0000145 }
146
147 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000148 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000149 format->fill_char = '0';
150 if (format->align == '\0') {
151 format->align = '=';
152 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000153 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000154 }
155
156 /* XXX add error checking */
157 specified_width = get_integer(&ptr, end, &format->width);
158
159 /* if specified_width is 0, we didn't consume any characters for
160 the width. in that case, reset the width to -1, because
161 get_integer() will have set it to zero */
162 if (specified_width == 0) {
163 format->width = -1;
164 }
165
166 /* Parse field precision */
167 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000168 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000169
170 /* XXX add error checking */
171 specified_width = get_integer(&ptr, end, &format->precision);
172
173 /* not having a precision after a dot is an error */
174 if (specified_width == 0) {
175 PyErr_Format(PyExc_ValueError,
176 "Format specifier missing precision");
177 return 0;
178 }
179
180 }
181
182 /* Finally, parse the type field */
183
184 if (end-ptr > 1) {
185 /* invalid conversion spec */
186 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
187 return 0;
188 }
189
190 if (end-ptr == 1) {
191 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000192 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000193 }
194
195 return 1;
196}
197
Eric Smith8fd3eba2008-02-17 19:48:00 +0000198#if defined FORMAT_FLOAT || defined FORMAT_LONG
Eric Smith8c663262007-08-25 02:26:07 +0000199/************************************************************************/
200/*********** common routines for numeric formatting *********************/
201/************************************************************************/
202
203/* describes the layout for an integer, see the comment in
204 _calc_integer_widths() for details */
205typedef struct {
206 Py_ssize_t n_lpadding;
207 Py_ssize_t n_spadding;
208 Py_ssize_t n_rpadding;
209 char lsign;
210 Py_ssize_t n_lsign;
211 char rsign;
212 Py_ssize_t n_rsign;
213 Py_ssize_t n_total; /* just a convenience, it's derivable from the
214 other fields */
215} NumberFieldWidths;
216
217/* not all fields of format are used. for example, precision is
218 unused. should this take discrete params in order to be more clear
219 about what it does? or is passing a single format parameter easier
220 and more efficient enough to justify a little obfuscation? */
221static void
222calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
223 Py_ssize_t n_digits, const InternalFormatSpec *format)
224{
225 r->n_lpadding = 0;
226 r->n_spadding = 0;
227 r->n_rpadding = 0;
228 r->lsign = '\0';
229 r->n_lsign = 0;
230 r->rsign = '\0';
231 r->n_rsign = 0;
232
233 /* the output will look like:
234 | |
235 | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
236 | |
237
238 lsign and rsign are computed from format->sign and the actual
239 sign of the number
240
241 digits is already known
242
243 the total width is either given, or computed from the
244 actual digits
245
246 only one of lpadding, spadding, and rpadding can be non-zero,
247 and it's calculated from the width and other fields
248 */
249
250 /* compute the various parts we're going to write */
251 if (format->sign == '+') {
252 /* always put a + or - */
253 r->n_lsign = 1;
254 r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith0cb431c2007-08-28 01:07:27 +0000255 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000256#if ALLOW_PARENS_FOR_SIGN
Eric Smith0cb431c2007-08-28 01:07:27 +0000257 else if (format->sign == '(') {
Eric Smith8c663262007-08-25 02:26:07 +0000258 if (actual_sign == '-') {
259 r->n_lsign = 1;
260 r->lsign = '(';
261 r->n_rsign = 1;
262 r->rsign = ')';
263 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000264 }
Eric Smithb7f5ba12007-08-29 12:38:45 +0000265#endif
Eric Smith0cb431c2007-08-28 01:07:27 +0000266 else if (format->sign == ' ') {
Eric Smith8c663262007-08-25 02:26:07 +0000267 r->n_lsign = 1;
268 r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith0cb431c2007-08-28 01:07:27 +0000269 }
270 else {
Eric Smith8c663262007-08-25 02:26:07 +0000271 /* non specified, or the default (-) */
272 if (actual_sign == '-') {
273 r->n_lsign = 1;
274 r->lsign = '-';
275 }
276 }
277
278 /* now the number of padding characters */
279 if (format->width == -1) {
280 /* no padding at all, nothing to do */
Eric Smith0cb431c2007-08-28 01:07:27 +0000281 }
282 else {
Eric Smith8c663262007-08-25 02:26:07 +0000283 /* see if any padding is needed */
284 if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
285 /* no padding needed, we're already bigger than the
286 requested width */
Eric Smith0cb431c2007-08-28 01:07:27 +0000287 }
288 else {
Eric Smith8c663262007-08-25 02:26:07 +0000289 /* determine which of left, space, or right padding is
290 needed */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000291 Py_ssize_t padding = format->width -
292 (r->n_lsign + n_digits + r->n_rsign);
Eric Smith8c663262007-08-25 02:26:07 +0000293 if (format->align == '<')
294 r->n_rpadding = padding;
295 else if (format->align == '>')
296 r->n_lpadding = padding;
297 else if (format->align == '^') {
298 r->n_lpadding = padding / 2;
299 r->n_rpadding = padding - r->n_lpadding;
Eric Smith0cb431c2007-08-28 01:07:27 +0000300 }
Eric Smith185e30c2007-08-30 22:23:08 +0000301 else if (format->align == '=')
Eric Smith8c663262007-08-25 02:26:07 +0000302 r->n_spadding = padding;
Eric Smith185e30c2007-08-30 22:23:08 +0000303 else
304 r->n_lpadding = padding;
Eric Smith8c663262007-08-25 02:26:07 +0000305 }
306 }
307 r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
308 n_digits + r->n_rsign + r->n_rpadding;
309}
310
311/* fill in the non-digit parts of a numbers's string representation,
312 as determined in _calc_integer_widths(). returns the pointer to
313 where the digits go. */
314static STRINGLIB_CHAR *
315fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
316 Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
317{
318 STRINGLIB_CHAR* p_digits;
319
320 if (spec->n_lpadding) {
321 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
322 p_buf += spec->n_lpadding;
323 }
324 if (spec->n_lsign == 1) {
325 *p_buf++ = spec->lsign;
326 }
327 if (spec->n_spadding) {
328 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
329 p_buf += spec->n_spadding;
330 }
331 p_digits = p_buf;
332 p_buf += n_digits;
333 if (spec->n_rsign == 1) {
334 *p_buf++ = spec->rsign;
335 }
336 if (spec->n_rpadding) {
337 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
338 p_buf += spec->n_rpadding;
339 }
340 return p_digits;
341}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000342#endif /* FORMAT_FLOAT || FORMAT_LONG */
Eric Smith8c663262007-08-25 02:26:07 +0000343
344/************************************************************************/
345/*********** string formatting ******************************************/
346/************************************************************************/
347
348static PyObject *
349format_string_internal(PyObject *value, const InternalFormatSpec *format)
350{
351 Py_ssize_t width; /* total field width */
352 Py_ssize_t lpad;
353 STRINGLIB_CHAR *dst;
354 STRINGLIB_CHAR *src = STRINGLIB_STR(value);
355 Py_ssize_t len = STRINGLIB_LEN(value);
356 PyObject *result = NULL;
357
358 /* sign is not allowed on strings */
359 if (format->sign != '\0') {
360 PyErr_SetString(PyExc_ValueError,
361 "Sign not allowed in string format specifier");
362 goto done;
363 }
364
365 /* '=' alignment not allowed on strings */
366 if (format->align == '=') {
367 PyErr_SetString(PyExc_ValueError,
368 "'=' alignment not allowed "
369 "in string format specifier");
370 goto done;
371 }
372
373 /* if precision is specified, output no more that format.precision
374 characters */
375 if (format->precision >= 0 && len >= format->precision) {
376 len = format->precision;
377 }
378
379 if (format->width >= 0) {
380 width = format->width;
381
382 /* but use at least len characters */
383 if (len > width) {
384 width = len;
385 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000386 }
387 else {
Eric Smith8c663262007-08-25 02:26:07 +0000388 /* not specified, use all of the chars and no more */
389 width = len;
390 }
391
392 /* allocate the resulting string */
393 result = STRINGLIB_NEW(NULL, width);
394 if (result == NULL)
395 goto done;
396
397 /* now write into that space */
398 dst = STRINGLIB_STR(result);
399
400 /* figure out how much leading space we need, based on the
401 aligning */
402 if (format->align == '>')
403 lpad = width - len;
404 else if (format->align == '^')
405 lpad = (width - len) / 2;
406 else
407 lpad = 0;
408
409 /* if right aligning, increment the destination allow space on the
410 left */
411 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
412
413 /* do any padding */
414 if (width > len) {
415 STRINGLIB_CHAR fill_char = format->fill_char;
416 if (fill_char == '\0') {
417 /* use the default, if not specified */
418 fill_char = ' ';
419 }
420
421 /* pad on left */
422 if (lpad)
423 STRINGLIB_FILL(dst, fill_char, lpad);
424
425 /* pad on right */
426 if (width - len - lpad)
427 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
428 }
429
430done:
431 return result;
432}
433
434
435/************************************************************************/
436/*********** long formatting ********************************************/
437/************************************************************************/
438
Eric Smith8fd3eba2008-02-17 19:48:00 +0000439#if defined FORMAT_LONG || defined FORMAT_INT
440typedef PyObject*
441(*IntOrLongToString)(PyObject *value, int base);
442
Eric Smith8c663262007-08-25 02:26:07 +0000443static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000444format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
445 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000446{
447 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000448 PyObject *tmp = NULL;
449 STRINGLIB_CHAR *pnumeric_chars;
450 STRINGLIB_CHAR numeric_char;
Eric Smith8c663262007-08-25 02:26:07 +0000451 STRINGLIB_CHAR sign = '\0';
452 STRINGLIB_CHAR *p;
453 Py_ssize_t n_digits; /* count of digits need from the computed
454 string */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000455 Py_ssize_t n_leading_chars;
Eric Smith5807c412008-05-11 21:00:57 +0000456 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
457 allocate, used for 'n'
458 formatting. */
Eric Smith8c663262007-08-25 02:26:07 +0000459 NumberFieldWidths spec;
460 long x;
461
462 /* no precision allowed on integers */
463 if (format->precision != -1) {
464 PyErr_SetString(PyExc_ValueError,
465 "Precision not allowed in integer format specifier");
466 goto done;
467 }
468
469
470 /* special case for character formatting */
471 if (format->type == 'c') {
472 /* error to specify a sign */
473 if (format->sign != '\0') {
474 PyErr_SetString(PyExc_ValueError,
475 "Sign not allowed with integer"
476 " format specifier 'c'");
477 goto done;
478 }
479
480 /* taken from unicodeobject.c formatchar() */
481 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000482/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000483 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000484 if (x == -1 && PyErr_Occurred())
485 goto done;
486#ifdef Py_UNICODE_WIDE
487 if (x < 0 || x > 0x10ffff) {
488 PyErr_SetString(PyExc_OverflowError,
489 "%c arg not in range(0x110000) "
490 "(wide Python build)");
491 goto done;
492 }
493#else
494 if (x < 0 || x > 0xffff) {
495 PyErr_SetString(PyExc_OverflowError,
496 "%c arg not in range(0x10000) "
497 "(narrow Python build)");
498 goto done;
499 }
500#endif
Eric Smith8fd3eba2008-02-17 19:48:00 +0000501 numeric_char = (STRINGLIB_CHAR)x;
502 pnumeric_chars = &numeric_char;
503 n_digits = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000504 }
505 else {
Eric Smith8c663262007-08-25 02:26:07 +0000506 int base;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000507 int leading_chars_to_skip; /* Number of characters added by
508 PyNumber_ToBase that we want to
509 skip over. */
510
511 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000512 PyNumber_ToBase */
513 switch (format->type) {
514 case 'b':
515 base = 2;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000516 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000517 break;
518 case 'o':
519 base = 8;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000520 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000521 break;
522 case 'x':
523 case 'X':
524 base = 16;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000525 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000526 break;
527 default: /* shouldn't be needed, but stops a compiler warning */
528 case 'd':
Eric Smith5807c412008-05-11 21:00:57 +0000529 case 'n':
Eric Smith8c663262007-08-25 02:26:07 +0000530 base = 10;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000531 leading_chars_to_skip = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000532 break;
533 }
534
Eric Smith8fd3eba2008-02-17 19:48:00 +0000535 /* Do the hard part, converting to a string in a given base */
536 tmp = tostring(value, base);
537 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000538 goto done;
539
Eric Smith8fd3eba2008-02-17 19:48:00 +0000540 pnumeric_chars = STRINGLIB_STR(tmp);
541 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000542
Eric Smith8fd3eba2008-02-17 19:48:00 +0000543 /* Remember not to modify what pnumeric_chars points to. it
544 might be interned. Only modify it after we copy it into a
545 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000546
Eric Smith8fd3eba2008-02-17 19:48:00 +0000547 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000548 and skip it */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000549 sign = pnumeric_chars[0];
Eric Smith8c663262007-08-25 02:26:07 +0000550 if (sign == '-') {
Eric Smith8fd3eba2008-02-17 19:48:00 +0000551 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000552 }
553
Eric Smith8fd3eba2008-02-17 19:48:00 +0000554 /* Skip over the leading chars (0x, 0b, etc.) */
555 n_digits -= leading_chars_to_skip;
556 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000557 }
558
Eric Smith8fd3eba2008-02-17 19:48:00 +0000559 /* Calculate the widths of the various leading and trailing parts */
Eric Smith8c663262007-08-25 02:26:07 +0000560 calc_number_widths(&spec, sign, n_digits, format);
561
Eric Smith5807c412008-05-11 21:00:57 +0000562 if (format->type == 'n')
563 /* Compute how many additional chars we need to allocate
564 to hold the thousands grouping. */
565 STRINGLIB_GROUPING(pnumeric_chars, n_digits,
566 pnumeric_chars+n_digits,
567 0, &n_grouping_chars, 0);
568
Eric Smith8fd3eba2008-02-17 19:48:00 +0000569 /* Allocate a new string to hold the result */
Eric Smith5807c412008-05-11 21:00:57 +0000570 result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000571 if (!result)
572 goto done;
573 p = STRINGLIB_STR(result);
Eric Smith8c663262007-08-25 02:26:07 +0000574
Eric Smith8fd3eba2008-02-17 19:48:00 +0000575 /* Fill in the digit parts */
576 n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
577 memmove(p + n_leading_chars,
578 pnumeric_chars,
579 n_digits * sizeof(STRINGLIB_CHAR));
580
Eric Smith5807c412008-05-11 21:00:57 +0000581 /* If type is 'X', convert to uppercase */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000582 if (format->type == 'X') {
583 Py_ssize_t t;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000584 for (t = 0; t < n_digits; ++t)
Eric Smith8fd3eba2008-02-17 19:48:00 +0000585 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith8c663262007-08-25 02:26:07 +0000586 }
587
Eric Smith5807c412008-05-11 21:00:57 +0000588 /* Insert the grouping, if any, after the uppercasing of 'X', so we can
589 ensure that grouping chars won't be affeted. */
590 if (n_grouping_chars && format->type == 'n') {
591 /* We know this can't fail, since we've already
592 reserved enough space. */
593 STRINGLIB_CHAR *pstart = p + n_leading_chars;
594 int r = STRINGLIB_GROUPING(pstart, n_digits,
595 pstart + n_digits,
596 spec.n_total+n_grouping_chars-n_leading_chars,
597 NULL, 0);
598 assert(r);
599 }
600
Eric Smith8fd3eba2008-02-17 19:48:00 +0000601 /* Fill in the non-digit parts */
Eric Smith8c663262007-08-25 02:26:07 +0000602 fill_number(p, &spec, n_digits,
603 format->fill_char == '\0' ? ' ' : format->fill_char);
604
Eric Smith8c663262007-08-25 02:26:07 +0000605done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000606 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000607 return result;
608}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000609#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000610
611/************************************************************************/
612/*********** float formatting *******************************************/
613/************************************************************************/
614
Eric Smith8fd3eba2008-02-17 19:48:00 +0000615#ifdef FORMAT_FLOAT
616#if STRINGLIB_IS_UNICODE
Eric Smith8c663262007-08-25 02:26:07 +0000617/* taken from unicodeobject.c */
618static Py_ssize_t
619strtounicode(Py_UNICODE *buffer, const char *charbuffer)
620{
621 register Py_ssize_t i;
622 Py_ssize_t len = strlen(charbuffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000623 for (i = len - 1; i >= 0; --i)
Eric Smith185e30c2007-08-30 22:23:08 +0000624 buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000625
626 return len;
627}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000628#endif
Eric Smith8c663262007-08-25 02:26:07 +0000629
Eric Smith8c663262007-08-25 02:26:07 +0000630/* see FORMATBUFLEN in unicodeobject.c */
631#define FLOAT_FORMATBUFLEN 120
632
633/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000634static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000635format_float_internal(PyObject *value,
636 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000637{
638 /* fmt = '%.' + `prec` + `type` + '%%'
639 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
640 char fmt[20];
641
642 /* taken from unicodeobject.c */
643 /* Worst case length calc to ensure no buffer overrun:
644
645 'g' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000646 fmt = %#.<prec>g
647 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
648 for any double rep.)
649 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000650
651 'f' formats:
Eric Smith185e30c2007-08-30 22:23:08 +0000652 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
653 len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith8c663262007-08-25 02:26:07 +0000654
655 If prec=0 the effective precision is 1 (the leading digit is
656 always given), therefore increase the length by one.
657
658 */
659 char charbuf[FLOAT_FORMATBUFLEN];
660 Py_ssize_t n_digits;
661 double x;
662 Py_ssize_t precision = format->precision;
663 PyObject *result = NULL;
664 STRINGLIB_CHAR sign;
665 char* trailing = "";
666 STRINGLIB_CHAR *p;
667 NumberFieldWidths spec;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000668 STRINGLIB_CHAR type = format->type;
Eric Smith8c663262007-08-25 02:26:07 +0000669
670#if STRINGLIB_IS_UNICODE
671 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
672#endif
673
674 /* first, do the conversion as 8-bit chars, using the platform's
675 snprintf. then, if needed, convert to unicode. */
676
677 /* 'F' is the same as 'f', per the PEP */
678 if (type == 'F')
679 type = 'f';
680
681 x = PyFloat_AsDouble(value);
682
683 if (x == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000684 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000685
686 if (type == '%') {
687 type = 'f';
688 x *= 100;
689 trailing = "%";
690 }
691
692 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000693 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000694 if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith185e30c2007-08-30 22:23:08 +0000695 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000696
697 /* cast "type", because if we're in unicode we need to pass a
698 8-bit char. this is safe, because we've restricted what "type"
699 can be */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000700 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
701 (char)type);
Eric Smith8c663262007-08-25 02:26:07 +0000702
Christian Heimesc3f30c42008-02-22 16:37:40 +0000703 /* do the actual formatting */
704 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith8c663262007-08-25 02:26:07 +0000705
706 /* adding trailing to fmt with PyOS_snprintf doesn't work, not
707 sure why. we'll just concatentate it here, no harm done. we
708 know we can't have a buffer overflow from the fmt size
709 analysis */
710 strcat(charbuf, trailing);
711
712 /* rather than duplicate the code for snprintf for both unicode
713 and 8 bit strings, we just use the 8 bit version and then
714 convert to unicode in a separate code path. that's probably
715 the lesser of 2 evils. */
716#if STRINGLIB_IS_UNICODE
717 n_digits = strtounicode(unicodebuf, charbuf);
718 p = unicodebuf;
719#else
720 /* compute the length. I believe this is done because the return
721 value from snprintf above is unreliable */
722 n_digits = strlen(charbuf);
723 p = charbuf;
724#endif
725
726 /* is a sign character present in the output? if so, remember it
727 and skip it */
728 sign = p[0];
729 if (sign == '-') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000730 ++p;
731 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000732 }
733
734 calc_number_widths(&spec, sign, n_digits, format);
735
736 /* allocate a string with enough space */
737 result = STRINGLIB_NEW(NULL, spec.n_total);
738 if (result == NULL)
739 goto done;
740
741 /* fill in the non-digit parts */
742 fill_number(STRINGLIB_STR(result), &spec, n_digits,
743 format->fill_char == '\0' ? ' ' : format->fill_char);
744
745 /* fill in the digit parts */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000746 memmove(STRINGLIB_STR(result) +
747 (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith8c663262007-08-25 02:26:07 +0000748 p,
749 n_digits * sizeof(STRINGLIB_CHAR));
750
751done:
752 return result;
753}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000754#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +0000755
756/************************************************************************/
757/*********** built in formatters ****************************************/
758/************************************************************************/
Eric Smith8fd3eba2008-02-17 19:48:00 +0000759#ifdef FORMAT_STRING
Eric Smith8c663262007-08-25 02:26:07 +0000760PyObject *
761FORMAT_STRING(PyObject* value, PyObject* args)
762{
763 PyObject *format_spec;
Eric Smith8c663262007-08-25 02:26:07 +0000764 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000765#if PY_VERSION_HEX < 0x03000000
766 PyObject *tmp = NULL;
767#endif
Eric Smith8c663262007-08-25 02:26:07 +0000768 InternalFormatSpec format;
769
Eric Smith8fd3eba2008-02-17 19:48:00 +0000770 /* If 2.x, we accept either str or unicode, and try to convert it
771 to the right type. In 3.x, we insist on only unicode */
772#if PY_VERSION_HEX >= 0x03000000
773 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
774 &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000775 goto done;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000776#else
777 /* If 2.x, convert format_spec to the same type as value */
778 /* This is to allow things like u''.format('') */
779 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
780 goto done;
781 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
782 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
783 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
784 goto done;
785 }
786 tmp = STRINGLIB_TOSTR(format_spec);
787 if (tmp == NULL)
788 goto done;
789 format_spec = tmp;
790#endif
Eric Smith8c663262007-08-25 02:26:07 +0000791
792 /* check for the special case of zero length format spec, make
793 it equivalent to str(value) */
794 if (STRINGLIB_LEN(format_spec) == 0) {
795 result = STRINGLIB_TOSTR(value);
796 goto done;
797 }
798
Eric Smith8fd3eba2008-02-17 19:48:00 +0000799
Eric Smith8c663262007-08-25 02:26:07 +0000800 /* parse the format_spec */
801 if (!parse_internal_render_format_spec(format_spec, &format, 's'))
802 goto done;
803
804 /* type conversion? */
805 switch (format.type) {
806 case 's':
807 /* no type conversion needed, already a string. do the formatting */
808 result = format_string_internal(value, &format);
809 break;
Eric Smith8c663262007-08-25 02:26:07 +0000810 default:
811 /* unknown */
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000812 #if STRINGLIB_IS_UNICODE
813 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
814 hence the two cases. If it is char, gcc complains that the
815 condition below is always true, hence the ifdef. */
816 if (format.type > 32 && format.type <128)
817 #endif
818 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
819 (char)format.type);
820 #if STRINGLIB_IS_UNICODE
821 else
822 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
823 (unsigned int)format.type);
824 #endif
Eric Smith8c663262007-08-25 02:26:07 +0000825 goto done;
826 }
827
828done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000829#if PY_VERSION_HEX < 0x03000000
830 Py_XDECREF(tmp);
831#endif
Eric Smith8c663262007-08-25 02:26:07 +0000832 return result;
833}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000834#endif /* FORMAT_STRING */
Eric Smith8c663262007-08-25 02:26:07 +0000835
Eric Smith8fd3eba2008-02-17 19:48:00 +0000836#if defined FORMAT_LONG || defined FORMAT_INT
837static PyObject*
838format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000839{
840 PyObject *format_spec;
841 PyObject *result = NULL;
842 PyObject *tmp = NULL;
843 InternalFormatSpec format;
844
Eric Smith8fd3eba2008-02-17 19:48:00 +0000845 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
846 &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000847 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000848
849 /* check for the special case of zero length format spec, make
850 it equivalent to str(value) */
851 if (STRINGLIB_LEN(format_spec) == 0) {
852 result = STRINGLIB_TOSTR(value);
853 goto done;
854 }
855
856 /* parse the format_spec */
857 if (!parse_internal_render_format_spec(format_spec, &format, 'd'))
858 goto done;
859
860 /* type conversion? */
861 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +0000862 case 'b':
863 case 'c':
864 case 'd':
865 case 'o':
866 case 'x':
867 case 'X':
Eric Smith5807c412008-05-11 21:00:57 +0000868 case 'n':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000869 /* no type conversion needed, already an int (or long). do
870 the formatting */
871 result = format_int_or_long_internal(value, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +0000872 break;
873
Eric Smithfa767ef2008-01-28 10:59:27 +0000874 case 'e':
875 case 'E':
876 case 'f':
877 case 'F':
878 case 'g':
879 case 'G':
Eric Smithfa767ef2008-01-28 10:59:27 +0000880 case '%':
881 /* convert to float */
882 tmp = PyNumber_Float(value);
883 if (tmp == NULL)
884 goto done;
885 result = format_float_internal(value, &format);
886 break;
887
Eric Smith8c663262007-08-25 02:26:07 +0000888 default:
889 /* unknown */
890 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
891 format.type);
892 goto done;
893 }
894
895done:
896 Py_XDECREF(tmp);
897 return result;
898}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000899#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000900
Eric Smith8fd3eba2008-02-17 19:48:00 +0000901#ifdef FORMAT_LONG
902/* Need to define long_format as a function that will convert a long
903 to a string. In 3.0, _PyLong_Format has the correct signature. In
904 2.x, we need to fudge a few parameters */
905#if PY_VERSION_HEX >= 0x03000000
906#define long_format _PyLong_Format
907#else
908static PyObject*
909long_format(PyObject* value, int base)
910{
911 /* Convert to base, don't add trailing 'L', and use the new octal
912 format. We already know this is a long object */
913 assert(PyLong_Check(value));
914 /* convert to base, don't add 'L', and use the new octal format */
915 return _PyLong_Format(value, base, 0, 1);
916}
917#endif
918
919PyObject *
920FORMAT_LONG(PyObject* value, PyObject* args)
921{
922 return format_int_or_long(value, args, long_format);
923}
924#endif /* FORMAT_LONG */
925
926#ifdef FORMAT_INT
927/* this is only used for 2.x, not 3.0 */
928static PyObject*
929int_format(PyObject* value, int base)
930{
931 /* Convert to base, and use the new octal format. We already
932 know this is an int object */
933 assert(PyInt_Check(value));
934 return _PyInt_Format((PyIntObject*)value, base, 1);
935}
936
937PyObject *
938FORMAT_INT(PyObject* value, PyObject* args)
939{
940 return format_int_or_long(value, args, int_format);
941}
942#endif /* FORMAT_INT */
943
944#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +0000945PyObject *
946FORMAT_FLOAT(PyObject *value, PyObject *args)
947{
948 PyObject *format_spec;
949 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000950 InternalFormatSpec format;
951
Eric Smith37f10382007-09-01 10:56:01 +0000952 if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000953 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000954
955 /* check for the special case of zero length format spec, make
956 it equivalent to str(value) */
957 if (STRINGLIB_LEN(format_spec) == 0) {
958 result = STRINGLIB_TOSTR(value);
959 goto done;
960 }
961
962 /* parse the format_spec */
Christian Heimesb186d002008-03-18 15:15:01 +0000963 if (!parse_internal_render_format_spec(format_spec, &format, '\0'))
Eric Smith8c663262007-08-25 02:26:07 +0000964 goto done;
965
966 /* type conversion? */
967 switch (format.type) {
Christian Heimesb186d002008-03-18 15:15:01 +0000968 case '\0':
969 /* 'Z' means like 'g', but with at least one decimal. See
970 PyOS_ascii_formatd */
971 format.type = 'Z';
972 /* Deliberate fall through to the next case statement */
Eric Smith8c663262007-08-25 02:26:07 +0000973 case 'e':
974 case 'E':
975 case 'f':
976 case 'F':
977 case 'g':
978 case 'G':
979 case 'n':
980 case '%':
981 /* no conversion, already a float. do the formatting */
982 result = format_float_internal(value, &format);
983 break;
984
985 default:
986 /* unknown */
987 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
988 format.type);
989 goto done;
990 }
991
992done:
Eric Smith8c663262007-08-25 02:26:07 +0000993 return result;
994}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000995#endif /* FORMAT_FLOAT */