blob: 1f3c535b4ae759e87dcb0641fd17fbae62a27d30 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
Eric Smith0923d1d2009-04-16 20:16:10 +00004#include <locale.h>
5
Eric Smith8c663262007-08-25 02:26:07 +00006/* Before including this, you must include either:
7 stringlib/unicodedefs.h
8 stringlib/stringdefs.h
9
10 Also, you should define the names:
11 FORMAT_STRING
12 FORMAT_LONG
13 FORMAT_FLOAT
Eric Smith58a42242009-04-30 01:00:33 +000014 FORMAT_COMPLEX
Eric Smith8c663262007-08-25 02:26:07 +000015 to be whatever you want the public names of these functions to
16 be. These are the only non-static functions defined here.
17*/
18
Eric Smith5e5c0db2009-02-20 14:25:03 +000019/* Raises an exception about an unknown presentation type for this
20 * type. */
21
22static void
23unknown_presentation_type(STRINGLIB_CHAR presentation_type,
24 const char* type_name)
25{
26#if STRINGLIB_IS_UNICODE
27 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
28 hence the two cases. If it is char, gcc complains that the
29 condition below is always true, hence the ifdef. */
30 if (presentation_type > 32 && presentation_type < 128)
31#endif
32 PyErr_Format(PyExc_ValueError,
33 "Unknown format code '%c' "
34 "for object of type '%.200s'",
35 presentation_type,
36 type_name);
37#if STRINGLIB_IS_UNICODE
38 else
39 PyErr_Format(PyExc_ValueError,
40 "Unknown format code '\\x%x' "
41 "for object of type '%.200s'",
42 (unsigned int)presentation_type,
43 type_name);
44#endif
45}
46
Eric Smith8c663262007-08-25 02:26:07 +000047/*
48 get_integer consumes 0 or more decimal digit characters from an
49 input string, updates *result with the corresponding positive
50 integer, and returns the number of digits consumed.
51
52 returns -1 on error.
53*/
54static int
55get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
56 Py_ssize_t *result)
57{
58 Py_ssize_t accumulator, digitval, oldaccumulator;
59 int numdigits;
60 accumulator = numdigits = 0;
61 for (;;(*ptr)++, numdigits++) {
62 if (*ptr >= end)
63 break;
64 digitval = STRINGLIB_TODECIMAL(**ptr);
65 if (digitval < 0)
66 break;
67 /*
68 This trick was copied from old Unicode format code. It's cute,
69 but would really suck on an old machine with a slow divide
70 implementation. Fortunately, in the normal case we do not
71 expect too many digits.
72 */
73 oldaccumulator = accumulator;
74 accumulator *= 10;
75 if ((accumulator+10)/10 != oldaccumulator+1) {
76 PyErr_Format(PyExc_ValueError,
77 "Too many decimal digits in format string");
78 return -1;
79 }
80 accumulator += digitval;
81 }
82 *result = accumulator;
83 return numdigits;
84}
85
86/************************************************************************/
87/*********** standard format specifier parsing **************************/
88/************************************************************************/
89
90/* returns true if this character is a specifier alignment token */
91Py_LOCAL_INLINE(int)
92is_alignment_token(STRINGLIB_CHAR c)
93{
94 switch (c) {
95 case '<': case '>': case '=': case '^':
96 return 1;
97 default:
98 return 0;
99 }
100}
101
102/* returns true if this character is a sign element */
103Py_LOCAL_INLINE(int)
104is_sign_element(STRINGLIB_CHAR c)
105{
106 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +0000107 case ' ': case '+': case '-':
Eric Smith8c663262007-08-25 02:26:07 +0000108 return 1;
109 default:
110 return 0;
111 }
112}
113
114
115typedef struct {
116 STRINGLIB_CHAR fill_char;
117 STRINGLIB_CHAR align;
Eric Smithb1ebcc62008-07-15 13:02:41 +0000118 int alternate;
Eric Smith8c663262007-08-25 02:26:07 +0000119 STRINGLIB_CHAR sign;
120 Py_ssize_t width;
Eric Smitha3b1ac82009-04-03 14:45:06 +0000121 int thousands_separators;
Eric Smith8c663262007-08-25 02:26:07 +0000122 Py_ssize_t precision;
123 STRINGLIB_CHAR type;
124} InternalFormatSpec;
125
126/*
127 ptr points to the start of the format_spec, end points just past its end.
128 fills in format with the parsed information.
129 returns 1 on success, 0 on failure.
130 if failure, sets the exception
131*/
132static int
Eric Smith4a7d76d2008-05-30 18:10:19 +0000133parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000134 Py_ssize_t format_spec_len,
Eric Smith8c663262007-08-25 02:26:07 +0000135 InternalFormatSpec *format,
136 char default_type)
137{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000138 STRINGLIB_CHAR *ptr = format_spec;
139 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smith8c663262007-08-25 02:26:07 +0000140
141 /* end-ptr is used throughout this code to specify the length of
142 the input string */
143
Eric Smith0923d1d2009-04-16 20:16:10 +0000144 Py_ssize_t consumed;
Eric Smith8c663262007-08-25 02:26:07 +0000145
146 format->fill_char = '\0';
147 format->align = '\0';
Eric Smithb1ebcc62008-07-15 13:02:41 +0000148 format->alternate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000149 format->sign = '\0';
150 format->width = -1;
Eric Smitha3b1ac82009-04-03 14:45:06 +0000151 format->thousands_separators = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000152 format->precision = -1;
153 format->type = default_type;
154
155 /* If the second char is an alignment token,
156 then parse the fill char */
157 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
158 format->align = ptr[1];
159 format->fill_char = ptr[0];
160 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000161 }
162 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000163 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000164 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000165 }
166
167 /* Parse the various sign options */
168 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
169 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000170 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000171 }
172
Eric Smithd68af8f2008-07-16 00:15:35 +0000173 /* If the next character is #, we're in alternate mode. This only
174 applies to integers. */
175 if (end-ptr >= 1 && ptr[0] == '#') {
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000176 format->alternate = 1;
177 ++ptr;
Eric Smithd68af8f2008-07-16 00:15:35 +0000178 }
179
Eric Smith8c663262007-08-25 02:26:07 +0000180 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000181 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000182 format->fill_char = '0';
183 if (format->align == '\0') {
184 format->align = '=';
185 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000186 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000187 }
188
Eric Smith0923d1d2009-04-16 20:16:10 +0000189 consumed = get_integer(&ptr, end, &format->width);
190 if (consumed == -1)
191 /* Overflow error. Exception already set. */
192 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000193
Eric Smith0923d1d2009-04-16 20:16:10 +0000194 /* If consumed is 0, we didn't consume any characters for the
195 width. In that case, reset the width to -1, because
196 get_integer() will have set it to zero. -1 is how we record
197 that the width wasn't specified. */
198 if (consumed == 0)
Eric Smith8c663262007-08-25 02:26:07 +0000199 format->width = -1;
Eric Smith8c663262007-08-25 02:26:07 +0000200
Eric Smitha3b1ac82009-04-03 14:45:06 +0000201 /* Comma signifies add thousands separators */
202 if (end-ptr && ptr[0] == ',') {
203 format->thousands_separators = 1;
204 ++ptr;
205 }
206
Eric Smith8c663262007-08-25 02:26:07 +0000207 /* Parse field precision */
208 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000209 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000210
Eric Smith0923d1d2009-04-16 20:16:10 +0000211 consumed = get_integer(&ptr, end, &format->precision);
212 if (consumed == -1)
213 /* Overflow error. Exception already set. */
214 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000215
Eric Smith0923d1d2009-04-16 20:16:10 +0000216 /* Not having a precision after a dot is an error. */
217 if (consumed == 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000218 PyErr_Format(PyExc_ValueError,
219 "Format specifier missing precision");
220 return 0;
221 }
222
223 }
224
Eric Smith0923d1d2009-04-16 20:16:10 +0000225 /* Finally, parse the type field. */
Eric Smith8c663262007-08-25 02:26:07 +0000226
227 if (end-ptr > 1) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000228 /* More than one char remain, invalid conversion spec. */
Eric Smith8c663262007-08-25 02:26:07 +0000229 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
230 return 0;
231 }
232
233 if (end-ptr == 1) {
234 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000235 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000236 }
237
Eric Smith0923d1d2009-04-16 20:16:10 +0000238 /* Do as much validating as we can, just by looking at the format
239 specifier. Do not take into account what type of formatting
240 we're doing (int, float, string). */
241
242 if (format->thousands_separators) {
243 switch (format->type) {
244 case 'd':
245 case 'e':
246 case 'f':
247 case 'g':
248 case 'E':
249 case 'G':
250 case '%':
251 case 'F':
Eric Smith937491d2009-04-22 17:04:27 +0000252 case '\0':
Eric Smith0923d1d2009-04-16 20:16:10 +0000253 /* These are allowed. See PEP 378.*/
254 break;
255 default:
256 PyErr_Format(PyExc_ValueError,
257 "Cannot specify ',' with '%c'.", format->type);
258 return 0;
259 }
Eric Smitha3b1ac82009-04-03 14:45:06 +0000260 }
261
Eric Smith8c663262007-08-25 02:26:07 +0000262 return 1;
263}
264
Eric Smith58a42242009-04-30 01:00:33 +0000265/* Calculate the padding needed. */
266static void
267calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
268 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
269 Py_ssize_t *n_total)
270{
271 if (width >= 0) {
272 if (nchars > width)
273 *n_total = nchars;
274 else
275 *n_total = width;
276 }
277 else {
278 /* not specified, use all of the chars and no more */
279 *n_total = nchars;
280 }
281
282 /* figure out how much leading space we need, based on the
283 aligning */
284 if (align == '>')
285 *n_lpadding = *n_total - nchars;
286 else if (align == '^')
287 *n_lpadding = (*n_total - nchars) / 2;
288 else
289 *n_lpadding = 0;
290
291 *n_rpadding = *n_total - nchars - *n_lpadding;
292}
293
294/* Do the padding, and return a pointer to where the caller-supplied
295 content goes. */
296static STRINGLIB_CHAR *
297fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
298 Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
299{
300 /* Pad on left. */
301 if (n_lpadding)
302 STRINGLIB_FILL(p, fill_char, n_lpadding);
303
304 /* Pad on right. */
305 if (n_rpadding)
306 STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
307
308 /* Pointer to the user content. */
309 return p + n_lpadding;
310}
311
312#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
Eric Smith8c663262007-08-25 02:26:07 +0000313/************************************************************************/
314/*********** common routines for numeric formatting *********************/
315/************************************************************************/
316
Eric Smith0923d1d2009-04-16 20:16:10 +0000317/* Locale type codes. */
318#define LT_CURRENT_LOCALE 0
319#define LT_DEFAULT_LOCALE 1
320#define LT_NO_LOCALE 2
321
322/* Locale info needed for formatting integers and the part of floats
323 before and including the decimal. Note that locales only support
324 8-bit chars, not unicode. */
325typedef struct {
326 char *decimal_point;
327 char *thousands_sep;
328 char *grouping;
329} LocaleInfo;
330
Eric Smith8c663262007-08-25 02:26:07 +0000331/* describes the layout for an integer, see the comment in
Eric Smithd68af8f2008-07-16 00:15:35 +0000332 calc_number_widths() for details */
Eric Smith8c663262007-08-25 02:26:07 +0000333typedef struct {
334 Py_ssize_t n_lpadding;
Eric Smithd68af8f2008-07-16 00:15:35 +0000335 Py_ssize_t n_prefix;
Eric Smith8c663262007-08-25 02:26:07 +0000336 Py_ssize_t n_spadding;
337 Py_ssize_t n_rpadding;
Eric Smith0923d1d2009-04-16 20:16:10 +0000338 char sign;
339 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
340 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
341 any grouping chars. */
342 Py_ssize_t n_decimal; /* 0 if only an integer */
343 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
344 excluding the decimal itself, if
345 present. */
346
347 /* These 2 are not the widths of fields, but are needed by
348 STRINGLIB_GROUPING. */
349 Py_ssize_t n_digits; /* The number of digits before a decimal
350 or exponent. */
351 Py_ssize_t n_min_width; /* The min_width we used when we computed
352 the n_grouped_digits width. */
Eric Smith8c663262007-08-25 02:26:07 +0000353} NumberFieldWidths;
354
Eric Smith58a42242009-04-30 01:00:33 +0000355
Eric Smith0923d1d2009-04-16 20:16:10 +0000356/* Given a number of the form:
357 digits[remainder]
358 where ptr points to the start and end points to the end, find where
359 the integer part ends. This could be a decimal, an exponent, both,
360 or neither.
361 If a decimal point is present, set *has_decimal and increment
362 remainder beyond it.
363 Results are undefined (but shouldn't crash) for improperly
364 formatted strings.
365*/
366static void
367parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
368 Py_ssize_t *n_remainder, int *has_decimal)
369{
370 STRINGLIB_CHAR *end = ptr + len;
371 STRINGLIB_CHAR *remainder;
372
373 while (ptr<end && isdigit(*ptr))
374 ++ptr;
375 remainder = ptr;
376
377 /* Does remainder start with a decimal point? */
378 *has_decimal = ptr<end && *remainder == '.';
379
380 /* Skip the decimal point. */
381 if (*has_decimal)
382 remainder++;
383
384 *n_remainder = end - remainder;
385}
386
Eric Smith8c663262007-08-25 02:26:07 +0000387/* not all fields of format are used. for example, precision is
388 unused. should this take discrete params in order to be more clear
389 about what it does? or is passing a single format parameter easier
390 and more efficient enough to justify a little obfuscation? */
Eric Smith0923d1d2009-04-16 20:16:10 +0000391static Py_ssize_t
392calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
393 STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
394 Py_ssize_t n_number, Py_ssize_t n_remainder,
395 int has_decimal, const LocaleInfo *locale,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000396 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000397{
Eric Smith0923d1d2009-04-16 20:16:10 +0000398 Py_ssize_t n_non_digit_non_padding;
399 Py_ssize_t n_padding;
400
401 spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
Eric Smith05212a12008-07-16 19:41:14 +0000402 spec->n_lpadding = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000403 spec->n_prefix = n_prefix;
404 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
405 spec->n_remainder = n_remainder;
Eric Smith05212a12008-07-16 19:41:14 +0000406 spec->n_spadding = 0;
407 spec->n_rpadding = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000408 spec->sign = '\0';
409 spec->n_sign = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000410
411 /* the output will look like:
Eric Smith0923d1d2009-04-16 20:16:10 +0000412 | |
413 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
414 | |
Eric Smith8c663262007-08-25 02:26:07 +0000415
Eric Smith0923d1d2009-04-16 20:16:10 +0000416 sign is computed from format->sign and the actual
Eric Smith8c663262007-08-25 02:26:07 +0000417 sign of the number
418
Eric Smithb1ebcc62008-07-15 13:02:41 +0000419 prefix is given (it's for the '0x' prefix)
420
Eric Smith8c663262007-08-25 02:26:07 +0000421 digits is already known
422
423 the total width is either given, or computed from the
424 actual digits
425
426 only one of lpadding, spadding, and rpadding can be non-zero,
427 and it's calculated from the width and other fields
428 */
429
430 /* compute the various parts we're going to write */
Eric Smith0923d1d2009-04-16 20:16:10 +0000431 switch (format->sign) {
432 case '+':
Eric Smith8c663262007-08-25 02:26:07 +0000433 /* always put a + or - */
Eric Smith0923d1d2009-04-16 20:16:10 +0000434 spec->n_sign = 1;
435 spec->sign = (sign_char == '-' ? '-' : '+');
436 break;
437 case ' ':
438 spec->n_sign = 1;
439 spec->sign = (sign_char == '-' ? '-' : ' ');
440 break;
441 default:
442 /* Not specified, or the default (-) */
443 if (sign_char == '-') {
444 spec->n_sign = 1;
445 spec->sign = '-';
Eric Smith8c663262007-08-25 02:26:07 +0000446 }
447 }
448
Eric Smith0923d1d2009-04-16 20:16:10 +0000449 /* The number of chars used for non-digits and non-padding. */
450 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
451 spec->n_remainder;
Eric Smithd68af8f2008-07-16 00:15:35 +0000452
Eric Smith0923d1d2009-04-16 20:16:10 +0000453 /* min_width can go negative, that's okay. format->width == -1 means
454 we don't care. */
455 if (format->fill_char == '0')
456 spec->n_min_width = format->width - n_non_digit_non_padding;
457 else
458 spec->n_min_width = 0;
459
460 if (spec->n_digits == 0)
461 /* This case only occurs when using 'c' formatting, we need
462 to special case it because the grouping code always wants
463 to have at least one character. */
464 spec->n_grouped_digits = 0;
465 else
466 spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
467 spec->n_digits,
468 spec->n_min_width,
469 locale->grouping,
470 locale->thousands_sep);
471
472 /* Given the desired width and the total of digit and non-digit
473 space we consume, see if we need any padding. format->width can
474 be negative (meaning no padding), but this code still works in
475 that case. */
476 n_padding = format->width -
477 (n_non_digit_non_padding + spec->n_grouped_digits);
478 if (n_padding > 0) {
479 /* Some padding is needed. Determine if it's left, space, or right. */
480 switch (format->align) {
481 case '<':
482 spec->n_rpadding = n_padding;
483 break;
484 case '^':
485 spec->n_lpadding = n_padding / 2;
486 spec->n_rpadding = n_padding - spec->n_lpadding;
487 break;
488 case '=':
489 spec->n_spadding = n_padding;
490 break;
491 default:
492 /* Handles '>', plus catch-all just in case. */
493 spec->n_lpadding = n_padding;
494 break;
Eric Smith8c663262007-08-25 02:26:07 +0000495 }
496 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000497 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
498 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
499 spec->n_remainder + spec->n_rpadding;
Eric Smith8c663262007-08-25 02:26:07 +0000500}
501
Eric Smith0923d1d2009-04-16 20:16:10 +0000502/* Fill in the digit parts of a numbers's string representation,
503 as determined in calc_number_widths().
504 No error checking, since we know the buffer is the correct size. */
505static void
506fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
507 STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
508 STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
509 LocaleInfo *locale, int toupper)
Eric Smith8c663262007-08-25 02:26:07 +0000510{
Eric Smith0923d1d2009-04-16 20:16:10 +0000511 /* Used to keep track of digits, decimal, and remainder. */
512 STRINGLIB_CHAR *p = digits;
513
514#ifndef NDEBUG
515 Py_ssize_t r;
516#endif
Eric Smith8c663262007-08-25 02:26:07 +0000517
518 if (spec->n_lpadding) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000519 STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
520 buf += spec->n_lpadding;
Eric Smith8c663262007-08-25 02:26:07 +0000521 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000522 if (spec->n_sign == 1) {
523 *buf++ = spec->sign;
Eric Smith8c663262007-08-25 02:26:07 +0000524 }
Eric Smithd68af8f2008-07-16 00:15:35 +0000525 if (spec->n_prefix) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000526 memmove(buf,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000527 prefix,
528 spec->n_prefix * sizeof(STRINGLIB_CHAR));
Eric Smith0923d1d2009-04-16 20:16:10 +0000529 if (toupper) {
530 Py_ssize_t t;
531 for (t = 0; t < spec->n_prefix; ++t)
532 buf[t] = STRINGLIB_TOUPPER(buf[t]);
533 }
534 buf += spec->n_prefix;
Eric Smithd68af8f2008-07-16 00:15:35 +0000535 }
Eric Smith8c663262007-08-25 02:26:07 +0000536 if (spec->n_spadding) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000537 STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
538 buf += spec->n_spadding;
Eric Smith8c663262007-08-25 02:26:07 +0000539 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000540
541 /* Only for type 'c' special case, it has no digits. */
542 if (spec->n_digits != 0) {
543 /* Fill the digits with InsertThousandsGrouping. */
544#ifndef NDEBUG
545 r =
546#endif
547 STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
548 spec->n_digits, spec->n_min_width,
549 locale->grouping, locale->thousands_sep);
550#ifndef NDEBUG
551 assert(r == spec->n_grouped_digits);
552#endif
553 p += spec->n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000554 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000555 if (toupper) {
556 Py_ssize_t t;
557 for (t = 0; t < spec->n_grouped_digits; ++t)
558 buf[t] = STRINGLIB_TOUPPER(buf[t]);
559 }
560 buf += spec->n_grouped_digits;
561
562 if (spec->n_decimal) {
563 Py_ssize_t t;
564 for (t = 0; t < spec->n_decimal; ++t)
565 buf[t] = locale->decimal_point[t];
566 buf += spec->n_decimal;
567 p += 1;
568 }
569
570 if (spec->n_remainder) {
571 memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
572 buf += spec->n_remainder;
573 p += spec->n_remainder;
574 }
575
Eric Smith8c663262007-08-25 02:26:07 +0000576 if (spec->n_rpadding) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000577 STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
578 buf += spec->n_rpadding;
Eric Smith8c663262007-08-25 02:26:07 +0000579 }
Eric Smith8c663262007-08-25 02:26:07 +0000580}
Eric Smith0923d1d2009-04-16 20:16:10 +0000581
582static char no_grouping[1] = {CHAR_MAX};
583
584/* Find the decimal point character(s?), thousands_separator(s?), and
585 grouping description, either for the current locale if type is
586 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
587 none if LT_NO_LOCALE. */
588static void
589get_locale_info(int type, LocaleInfo *locale_info)
590{
591 switch (type) {
592 case LT_CURRENT_LOCALE: {
593 struct lconv *locale_data = localeconv();
594 locale_info->decimal_point = locale_data->decimal_point;
595 locale_info->thousands_sep = locale_data->thousands_sep;
596 locale_info->grouping = locale_data->grouping;
597 break;
598 }
599 case LT_DEFAULT_LOCALE:
600 locale_info->decimal_point = ".";
601 locale_info->thousands_sep = ",";
602 locale_info->grouping = "\3"; /* Group every 3 characters,
603 trailing 0 means repeat
604 infinitely. */
605 break;
606 case LT_NO_LOCALE:
607 locale_info->decimal_point = ".";
608 locale_info->thousands_sep = "";
609 locale_info->grouping = no_grouping;
610 break;
611 default:
612 assert(0);
613 }
614}
615
Eric Smith58a42242009-04-30 01:00:33 +0000616#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
Eric Smith8c663262007-08-25 02:26:07 +0000617
618/************************************************************************/
619/*********** string formatting ******************************************/
620/************************************************************************/
621
622static PyObject *
623format_string_internal(PyObject *value, const InternalFormatSpec *format)
624{
Eric Smith8c663262007-08-25 02:26:07 +0000625 Py_ssize_t lpad;
Eric Smith58a42242009-04-30 01:00:33 +0000626 Py_ssize_t rpad;
627 Py_ssize_t total;
628 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000629 Py_ssize_t len = STRINGLIB_LEN(value);
630 PyObject *result = NULL;
631
632 /* sign is not allowed on strings */
633 if (format->sign != '\0') {
634 PyErr_SetString(PyExc_ValueError,
635 "Sign not allowed in string format specifier");
636 goto done;
637 }
638
Eric Smithb1ebcc62008-07-15 13:02:41 +0000639 /* alternate is not allowed on strings */
640 if (format->alternate) {
641 PyErr_SetString(PyExc_ValueError,
642 "Alternate form (#) not allowed in string format "
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000643 "specifier");
Eric Smithb1ebcc62008-07-15 13:02:41 +0000644 goto done;
645 }
646
Eric Smith8c663262007-08-25 02:26:07 +0000647 /* '=' alignment not allowed on strings */
648 if (format->align == '=') {
649 PyErr_SetString(PyExc_ValueError,
650 "'=' alignment not allowed "
651 "in string format specifier");
652 goto done;
653 }
654
655 /* if precision is specified, output no more that format.precision
656 characters */
657 if (format->precision >= 0 && len >= format->precision) {
658 len = format->precision;
659 }
660
Eric Smith58a42242009-04-30 01:00:33 +0000661 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
Eric Smith8c663262007-08-25 02:26:07 +0000662
663 /* allocate the resulting string */
Eric Smith58a42242009-04-30 01:00:33 +0000664 result = STRINGLIB_NEW(NULL, total);
Eric Smith8c663262007-08-25 02:26:07 +0000665 if (result == NULL)
666 goto done;
667
Eric Smith58a42242009-04-30 01:00:33 +0000668 /* Write into that space. First the padding. */
669 p = fill_padding(STRINGLIB_STR(result), len,
670 format->fill_char=='\0'?' ':format->fill_char,
671 lpad, rpad);
Eric Smith8c663262007-08-25 02:26:07 +0000672
Eric Smith58a42242009-04-30 01:00:33 +0000673 /* Then the source string. */
674 memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
Eric Smith8c663262007-08-25 02:26:07 +0000675
676done:
677 return result;
678}
679
680
681/************************************************************************/
682/*********** long formatting ********************************************/
683/************************************************************************/
684
Eric Smith8fd3eba2008-02-17 19:48:00 +0000685#if defined FORMAT_LONG || defined FORMAT_INT
686typedef PyObject*
687(*IntOrLongToString)(PyObject *value, int base);
688
Eric Smith8c663262007-08-25 02:26:07 +0000689static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000690format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000691 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000692{
693 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000694 PyObject *tmp = NULL;
695 STRINGLIB_CHAR *pnumeric_chars;
696 STRINGLIB_CHAR numeric_char;
Eric Smith0923d1d2009-04-16 20:16:10 +0000697 STRINGLIB_CHAR sign_char = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000698 Py_ssize_t n_digits; /* count of digits need from the computed
699 string */
Eric Smith0923d1d2009-04-16 20:16:10 +0000700 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
701 produces non-digits */
Eric Smithd68af8f2008-07-16 00:15:35 +0000702 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
Eric Smith0923d1d2009-04-16 20:16:10 +0000703 Py_ssize_t n_total;
Eric Smithd68af8f2008-07-16 00:15:35 +0000704 STRINGLIB_CHAR *prefix = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000705 NumberFieldWidths spec;
706 long x;
707
Eric Smith0923d1d2009-04-16 20:16:10 +0000708 /* Locale settings, either from the actual locale or
709 from a hard-code pseudo-locale */
710 LocaleInfo locale;
711
Eric Smith8c663262007-08-25 02:26:07 +0000712 /* no precision allowed on integers */
713 if (format->precision != -1) {
714 PyErr_SetString(PyExc_ValueError,
715 "Precision not allowed in integer format specifier");
716 goto done;
717 }
718
Eric Smith8c663262007-08-25 02:26:07 +0000719 /* special case for character formatting */
720 if (format->type == 'c') {
721 /* error to specify a sign */
722 if (format->sign != '\0') {
723 PyErr_SetString(PyExc_ValueError,
724 "Sign not allowed with integer"
725 " format specifier 'c'");
726 goto done;
727 }
728
Eric Smith0923d1d2009-04-16 20:16:10 +0000729 /* Error to specify a comma. */
730 if (format->thousands_separators) {
731 PyErr_SetString(PyExc_ValueError,
732 "Thousands separators not allowed with integer"
733 " format specifier 'c'");
734 goto done;
735 }
736
Eric Smith8c663262007-08-25 02:26:07 +0000737 /* taken from unicodeobject.c formatchar() */
738 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000739/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000740 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000741 if (x == -1 && PyErr_Occurred())
742 goto done;
743#ifdef Py_UNICODE_WIDE
744 if (x < 0 || x > 0x10ffff) {
745 PyErr_SetString(PyExc_OverflowError,
746 "%c arg not in range(0x110000) "
747 "(wide Python build)");
748 goto done;
749 }
750#else
751 if (x < 0 || x > 0xffff) {
752 PyErr_SetString(PyExc_OverflowError,
753 "%c arg not in range(0x10000) "
754 "(narrow Python build)");
755 goto done;
756 }
757#endif
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000758 numeric_char = (STRINGLIB_CHAR)x;
759 pnumeric_chars = &numeric_char;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000760 n_digits = 1;
Eric Smith0923d1d2009-04-16 20:16:10 +0000761
762 /* As a sort-of hack, we tell calc_number_widths that we only
763 have "remainder" characters. calc_number_widths thinks
764 these are characters that don't get formatted, only copied
765 into the output string. We do this for 'c' formatting,
766 because the characters are likely to be non-digits. */
767 n_remainder = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000768 }
769 else {
Eric Smith8c663262007-08-25 02:26:07 +0000770 int base;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000771 int leading_chars_to_skip = 0; /* Number of characters added by
772 PyNumber_ToBase that we want to
773 skip over. */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000774
775 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000776 PyNumber_ToBase */
777 switch (format->type) {
778 case 'b':
779 base = 2;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000780 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000781 break;
782 case 'o':
783 base = 8;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000784 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000785 break;
786 case 'x':
787 case 'X':
788 base = 16;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000789 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000790 break;
791 default: /* shouldn't be needed, but stops a compiler warning */
792 case 'd':
Eric Smith5807c412008-05-11 21:00:57 +0000793 case 'n':
Eric Smith8c663262007-08-25 02:26:07 +0000794 base = 10;
Eric Smith8c663262007-08-25 02:26:07 +0000795 break;
796 }
797
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000798 /* The number of prefix chars is the same as the leading
799 chars to skip */
800 if (format->alternate)
801 n_prefix = leading_chars_to_skip;
Eric Smithd68af8f2008-07-16 00:15:35 +0000802
Eric Smith8fd3eba2008-02-17 19:48:00 +0000803 /* Do the hard part, converting to a string in a given base */
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000804 tmp = tostring(value, base);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000805 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000806 goto done;
807
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000808 pnumeric_chars = STRINGLIB_STR(tmp);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000809 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000810
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000811 prefix = pnumeric_chars;
Eric Smithd68af8f2008-07-16 00:15:35 +0000812
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000813 /* Remember not to modify what pnumeric_chars points to. it
814 might be interned. Only modify it after we copy it into a
815 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000816
Eric Smith8fd3eba2008-02-17 19:48:00 +0000817 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000818 and skip it */
Eric Smith0923d1d2009-04-16 20:16:10 +0000819 if (pnumeric_chars[0] == '-') {
820 sign_char = pnumeric_chars[0];
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000821 ++prefix;
822 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000823 }
824
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000825 /* Skip over the leading chars (0x, 0b, etc.) */
826 n_digits -= leading_chars_to_skip;
827 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000828 }
829
Eric Smith0923d1d2009-04-16 20:16:10 +0000830 /* Determine the grouping, separator, and decimal point, if any. */
831 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
832 (format->thousands_separators ?
833 LT_DEFAULT_LOCALE :
834 LT_NO_LOCALE),
835 &locale);
Eric Smith5807c412008-05-11 21:00:57 +0000836
Eric Smith0923d1d2009-04-16 20:16:10 +0000837 /* Calculate how much memory we'll need. */
838 n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
839 n_digits, n_remainder, 0, &locale, format);
Eric Smithb151a452008-06-24 11:21:04 +0000840
Eric Smith0923d1d2009-04-16 20:16:10 +0000841 /* Allocate the memory. */
842 result = STRINGLIB_NEW(NULL, n_total);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000843 if (!result)
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000844 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000845
Eric Smith0923d1d2009-04-16 20:16:10 +0000846 /* Populate the memory. */
847 fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
848 prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
849 &locale, format->type == 'X');
Eric Smithd68af8f2008-07-16 00:15:35 +0000850
Eric Smith8c663262007-08-25 02:26:07 +0000851done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000852 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000853 return result;
854}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000855#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000856
857/************************************************************************/
858/*********** float formatting *******************************************/
859/************************************************************************/
860
Eric Smith8fd3eba2008-02-17 19:48:00 +0000861#ifdef FORMAT_FLOAT
862#if STRINGLIB_IS_UNICODE
Eric Smith0923d1d2009-04-16 20:16:10 +0000863static void
864strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
Eric Smith8c663262007-08-25 02:26:07 +0000865{
Eric Smith0923d1d2009-04-16 20:16:10 +0000866 Py_ssize_t i;
867 for (i = 0; i < len; ++i)
868 buffer[i] = (Py_UNICODE)charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000869}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000870#endif
Eric Smith8c663262007-08-25 02:26:07 +0000871
Eric Smith8c663262007-08-25 02:26:07 +0000872/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000873static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000874format_float_internal(PyObject *value,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000875 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000876{
Eric Smith0923d1d2009-04-16 20:16:10 +0000877 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
Eric Smith8c663262007-08-25 02:26:07 +0000878 Py_ssize_t n_digits;
Eric Smith0923d1d2009-04-16 20:16:10 +0000879 Py_ssize_t n_remainder;
880 Py_ssize_t n_total;
881 int has_decimal;
882 double val;
Eric Smith8c663262007-08-25 02:26:07 +0000883 Py_ssize_t precision = format->precision;
Eric Smith0923d1d2009-04-16 20:16:10 +0000884 STRINGLIB_CHAR type = format->type;
885 int add_pct = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000886 STRINGLIB_CHAR *p;
887 NumberFieldWidths spec;
Eric Smith0923d1d2009-04-16 20:16:10 +0000888 int flags = 0;
889 PyObject *result = NULL;
890 STRINGLIB_CHAR sign_char = '\0';
891 int float_type; /* Used to see if we have a nan, inf, or regular float. */
Eric Smith8c663262007-08-25 02:26:07 +0000892
893#if STRINGLIB_IS_UNICODE
Eric Smith0923d1d2009-04-16 20:16:10 +0000894 Py_UNICODE *unicode_tmp = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000895#endif
896
Eric Smith0923d1d2009-04-16 20:16:10 +0000897 /* Locale settings, either from the actual locale or
898 from a hard-code pseudo-locale */
899 LocaleInfo locale;
900
901 /* Alternate is not allowed on floats. */
Eric Smithb1ebcc62008-07-15 13:02:41 +0000902 if (format->alternate) {
903 PyErr_SetString(PyExc_ValueError,
904 "Alternate form (#) not allowed in float format "
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000905 "specifier");
Eric Smithb1ebcc62008-07-15 13:02:41 +0000906 goto done;
907 }
908
Eric Smith0923d1d2009-04-16 20:16:10 +0000909 if (type == '\0') {
910 /* Omitted type specifier. This is like 'g' but with at least
911 one digit after the decimal point. */
912 type = 'g';
913 flags |= Py_DTSF_ADD_DOT_0;
914 }
915
916 if (type == 'n')
917 /* 'n' is the same as 'g', except for the locale used to
918 format the result. We take care of that later. */
919 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000920
Eric Smith22b85b32008-07-17 19:18:29 +0000921 /* 'F' is the same as 'f', per the PEP */
922 if (type == 'F')
923 type = 'f';
924
Eric Smith0923d1d2009-04-16 20:16:10 +0000925 val = PyFloat_AsDouble(value);
926 if (val == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000927 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000928
929 if (type == '%') {
930 type = 'f';
Eric Smith0923d1d2009-04-16 20:16:10 +0000931 val *= 100;
932 add_pct = 1;
Eric Smith8c663262007-08-25 02:26:07 +0000933 }
934
935 if (precision < 0)
Eric Smith185e30c2007-08-30 22:23:08 +0000936 precision = 6;
Eric Smith8c663262007-08-25 02:26:07 +0000937
Eric Smith7255f182009-05-02 12:15:39 +0000938#if PY_VERSION_HEX < 0x03010000
939 /* 3.1 no longer converts large 'f' to 'g'. */
940 if ((type == 'f' || type == 'F') && fabs(val) >= 1e50)
941 type = 'g';
942#endif
943
Eric Smith0923d1d2009-04-16 20:16:10 +0000944 /* Cast "type", because if we're in unicode we need to pass a
945 8-bit char. This is safe, because we've restricted what "type"
946 can be. */
947 buf = PyOS_double_to_string(val, (char)type, precision, flags,
948 &float_type);
949 if (buf == NULL)
950 goto done;
951 n_digits = strlen(buf);
Eric Smith8c663262007-08-25 02:26:07 +0000952
Eric Smith0923d1d2009-04-16 20:16:10 +0000953 if (add_pct) {
954 /* We know that buf has a trailing zero (since we just called
955 strlen() on it), and we don't use that fact any more. So we
956 can just write over the trailing zero. */
957 buf[n_digits] = '%';
958 n_digits += 1;
959 }
Eric Smith8c663262007-08-25 02:26:07 +0000960
Eric Smith0923d1d2009-04-16 20:16:10 +0000961 /* Since there is no unicode version of PyOS_double_to_string,
962 just use the 8 bit version and then convert to unicode. */
Eric Smith8c663262007-08-25 02:26:07 +0000963#if STRINGLIB_IS_UNICODE
Eric Smith0923d1d2009-04-16 20:16:10 +0000964 unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
965 if (unicode_tmp == NULL) {
966 PyErr_NoMemory();
967 goto done;
968 }
969 strtounicode(unicode_tmp, buf, n_digits);
970 p = unicode_tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000971#else
Eric Smith0923d1d2009-04-16 20:16:10 +0000972 p = buf;
Eric Smith8c663262007-08-25 02:26:07 +0000973#endif
974
Eric Smith0923d1d2009-04-16 20:16:10 +0000975 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000976 and skip it */
Eric Smith0923d1d2009-04-16 20:16:10 +0000977 if (*p == '-') {
978 sign_char = *p;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000979 ++p;
980 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000981 }
982
Eric Smith0923d1d2009-04-16 20:16:10 +0000983 /* Determine if we have any "remainder" (after the digits, might include
984 decimal or exponent or both (or neither)) */
985 parse_number(p, n_digits, &n_remainder, &has_decimal);
Eric Smith8c663262007-08-25 02:26:07 +0000986
Eric Smith0923d1d2009-04-16 20:16:10 +0000987 /* Determine the grouping, separator, and decimal point, if any. */
988 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
989 (format->thousands_separators ?
990 LT_DEFAULT_LOCALE :
991 LT_NO_LOCALE),
992 &locale);
993
994 /* Calculate how much memory we'll need. */
995 n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
996 n_remainder, has_decimal, &locale, format);
997
998 /* Allocate the memory. */
999 result = STRINGLIB_NEW(NULL, n_total);
Eric Smith8c663262007-08-25 02:26:07 +00001000 if (result == NULL)
1001 goto done;
1002
Eric Smith0923d1d2009-04-16 20:16:10 +00001003 /* Populate the memory. */
1004 fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
1005 format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
1006 0);
Eric Smith8c663262007-08-25 02:26:07 +00001007
1008done:
Eric Smith0923d1d2009-04-16 20:16:10 +00001009 PyMem_Free(buf);
1010#if STRINGLIB_IS_UNICODE
1011 PyMem_Free(unicode_tmp);
1012#endif
Eric Smith8c663262007-08-25 02:26:07 +00001013 return result;
1014}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001015#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +00001016
1017/************************************************************************/
Eric Smith58a42242009-04-30 01:00:33 +00001018/*********** complex formatting *****************************************/
1019/************************************************************************/
1020
1021#ifdef FORMAT_COMPLEX
1022
1023static PyObject *
1024format_complex_internal(PyObject *value,
1025 const InternalFormatSpec *format)
1026{
1027 double re;
1028 double im;
1029 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1030 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1031
1032 InternalFormatSpec tmp_format = *format;
1033 Py_ssize_t n_re_digits;
1034 Py_ssize_t n_im_digits;
1035 Py_ssize_t n_re_remainder;
1036 Py_ssize_t n_im_remainder;
1037 Py_ssize_t n_re_total;
1038 Py_ssize_t n_im_total;
1039 int re_has_decimal;
1040 int im_has_decimal;
1041 Py_ssize_t precision = format->precision;
1042 STRINGLIB_CHAR type = format->type;
1043 STRINGLIB_CHAR *p_re;
1044 STRINGLIB_CHAR *p_im;
1045 NumberFieldWidths re_spec;
1046 NumberFieldWidths im_spec;
1047 int flags = 0;
1048 PyObject *result = NULL;
1049 STRINGLIB_CHAR *p;
1050 STRINGLIB_CHAR re_sign_char = '\0';
1051 STRINGLIB_CHAR im_sign_char = '\0';
1052 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1053 int im_float_type;
1054 int add_parens = 0;
1055 int skip_re = 0;
1056 Py_ssize_t lpad;
1057 Py_ssize_t rpad;
1058 Py_ssize_t total;
1059
1060#if STRINGLIB_IS_UNICODE
1061 Py_UNICODE *re_unicode_tmp = NULL;
1062 Py_UNICODE *im_unicode_tmp = NULL;
1063#endif
1064
1065 /* Locale settings, either from the actual locale or
1066 from a hard-code pseudo-locale */
1067 LocaleInfo locale;
1068
1069 /* Alternate is not allowed on complex. */
1070 if (format->alternate) {
1071 PyErr_SetString(PyExc_ValueError,
1072 "Alternate form (#) not allowed in complex format "
1073 "specifier");
1074 goto done;
1075 }
1076
1077 /* Neither is zero pading. */
1078 if (format->fill_char == '0') {
1079 PyErr_SetString(PyExc_ValueError,
1080 "Zero padding is not allowed in complex format "
1081 "specifier");
1082 goto done;
1083 }
1084
1085 /* Neither is '=' alignment . */
1086 if (format->align == '=') {
1087 PyErr_SetString(PyExc_ValueError,
1088 "'=' alignment flag is not allowed in complex format "
1089 "specifier");
1090 goto done;
1091 }
1092
1093 re = PyComplex_RealAsDouble(value);
1094 if (re == -1.0 && PyErr_Occurred())
1095 goto done;
1096 im = PyComplex_ImagAsDouble(value);
1097 if (im == -1.0 && PyErr_Occurred())
1098 goto done;
1099
1100 if (type == '\0') {
1101 /* Omitted type specifier. Should be like str(self). */
1102 type = 'g';
1103 add_parens = 1;
1104 if (re == 0.0)
1105 skip_re = 1;
1106 }
1107
1108 if (type == 'n')
1109 /* 'n' is the same as 'g', except for the locale used to
1110 format the result. We take care of that later. */
1111 type = 'g';
1112
1113 /* 'F' is the same as 'f', per the PEP */
1114 if (type == 'F')
1115 type = 'f';
1116
1117 if (precision < 0)
1118 precision = 6;
1119
1120 /* Cast "type", because if we're in unicode we need to pass a
1121 8-bit char. This is safe, because we've restricted what "type"
1122 can be. */
1123 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1124 &re_float_type);
1125 if (re_buf == NULL)
1126 goto done;
1127 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1128 &im_float_type);
1129 if (im_buf == NULL)
1130 goto done;
1131
1132 n_re_digits = strlen(re_buf);
1133 n_im_digits = strlen(im_buf);
1134
1135 /* Since there is no unicode version of PyOS_double_to_string,
1136 just use the 8 bit version and then convert to unicode. */
1137#if STRINGLIB_IS_UNICODE
1138 re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
1139 if (re_unicode_tmp == NULL) {
1140 PyErr_NoMemory();
1141 goto done;
1142 }
1143 strtounicode(re_unicode_tmp, re_buf, n_re_digits);
1144 p_re = re_unicode_tmp;
1145
1146 im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
1147 if (im_unicode_tmp == NULL) {
1148 PyErr_NoMemory();
1149 goto done;
1150 }
1151 strtounicode(im_unicode_tmp, im_buf, n_im_digits);
1152 p_im = im_unicode_tmp;
1153#else
1154 p_re = re_buf;
1155 p_im = im_buf;
1156#endif
1157
1158 /* Is a sign character present in the output? If so, remember it
1159 and skip it */
1160 if (*p_re == '-') {
1161 re_sign_char = *p_re;
1162 ++p_re;
1163 --n_re_digits;
1164 }
1165 if (*p_im == '-') {
1166 im_sign_char = *p_im;
1167 ++p_im;
1168 --n_im_digits;
1169 }
1170
1171 /* Determine if we have any "remainder" (after the digits, might include
1172 decimal or exponent or both (or neither)) */
1173 parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
1174 parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
1175
1176 /* Determine the grouping, separator, and decimal point, if any. */
1177 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1178 (format->thousands_separators ?
1179 LT_DEFAULT_LOCALE :
1180 LT_NO_LOCALE),
1181 &locale);
1182
1183 /* Turn off any padding. We'll do it later after we've composed
1184 the numbers without padding. */
1185 tmp_format.fill_char = '\0';
1186 tmp_format.align = '\0';
1187 tmp_format.width = -1;
1188
1189 /* Calculate how much memory we'll need. */
1190 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
1191 n_re_digits, n_re_remainder,
1192 re_has_decimal, &locale, &tmp_format);
1193
1194 /* Same formatting, but always include a sign. */
1195 tmp_format.sign = '+';
1196 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
1197 n_im_digits, n_im_remainder,
1198 im_has_decimal, &locale, &tmp_format);
1199
1200 if (skip_re)
1201 n_re_total = 0;
1202
1203 /* Add 1 for the 'j', and optionally 2 for parens. */
1204 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1205 format->width, format->align, &lpad, &rpad, &total);
1206
1207 result = STRINGLIB_NEW(NULL, total);
1208 if (result == NULL)
1209 goto done;
1210
1211 /* Populate the memory. First, the padding. */
1212 p = fill_padding(STRINGLIB_STR(result),
1213 n_re_total + n_im_total + 1 + add_parens * 2,
1214 format->fill_char=='\0' ? ' ' : format->fill_char,
1215 lpad, rpad);
1216
1217 if (add_parens)
1218 *p++ = '(';
1219
1220 if (!skip_re) {
1221 fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
1222 p += n_re_total;
1223 }
1224 fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
1225 p += n_im_total;
1226 *p++ = 'j';
1227
1228 if (add_parens)
1229 *p++ = ')';
1230
1231done:
1232 PyMem_Free(re_buf);
1233 PyMem_Free(im_buf);
1234#if STRINGLIB_IS_UNICODE
1235 PyMem_Free(re_unicode_tmp);
1236 PyMem_Free(im_unicode_tmp);
1237#endif
1238 return result;
1239}
1240#endif /* FORMAT_COMPLEX */
1241
1242/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +00001243/*********** built in formatters ****************************************/
1244/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +00001245PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001246FORMAT_STRING(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001247 STRINGLIB_CHAR *format_spec,
1248 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +00001249{
Eric Smith8c663262007-08-25 02:26:07 +00001250 InternalFormatSpec format;
Eric Smith4a7d76d2008-05-30 18:10:19 +00001251 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +00001252
1253 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +00001254 it equivalent to str(obj) */
1255 if (format_spec_len == 0) {
1256 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +00001257 goto done;
1258 }
1259
1260 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001261 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001262 &format, 's'))
Eric Smith8c663262007-08-25 02:26:07 +00001263 goto done;
1264
1265 /* type conversion? */
1266 switch (format.type) {
1267 case 's':
1268 /* no type conversion needed, already a string. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001269 result = format_string_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +00001270 break;
Eric Smith8c663262007-08-25 02:26:07 +00001271 default:
1272 /* unknown */
Eric Smith5e5c0db2009-02-20 14:25:03 +00001273 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smith8c663262007-08-25 02:26:07 +00001274 goto done;
1275 }
1276
1277done:
Eric Smith8c663262007-08-25 02:26:07 +00001278 return result;
1279}
1280
Eric Smith8fd3eba2008-02-17 19:48:00 +00001281#if defined FORMAT_LONG || defined FORMAT_INT
1282static PyObject*
Eric Smith4a7d76d2008-05-30 18:10:19 +00001283format_int_or_long(PyObject* obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001284 STRINGLIB_CHAR *format_spec,
1285 Py_ssize_t format_spec_len,
1286 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +00001287{
Eric Smith8c663262007-08-25 02:26:07 +00001288 PyObject *result = NULL;
1289 PyObject *tmp = NULL;
1290 InternalFormatSpec format;
1291
Eric Smith8c663262007-08-25 02:26:07 +00001292 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +00001293 it equivalent to str(obj) */
1294 if (format_spec_len == 0) {
1295 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +00001296 goto done;
1297 }
1298
1299 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001300 if (!parse_internal_render_format_spec(format_spec,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001301 format_spec_len,
1302 &format, 'd'))
Eric Smith8c663262007-08-25 02:26:07 +00001303 goto done;
1304
1305 /* type conversion? */
1306 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +00001307 case 'b':
1308 case 'c':
1309 case 'd':
1310 case 'o':
1311 case 'x':
1312 case 'X':
Eric Smith5807c412008-05-11 21:00:57 +00001313 case 'n':
Eric Smith8fd3eba2008-02-17 19:48:00 +00001314 /* no type conversion needed, already an int (or long). do
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001315 the formatting */
1316 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +00001317 break;
1318
Eric Smithfa767ef2008-01-28 10:59:27 +00001319 case 'e':
1320 case 'E':
1321 case 'f':
1322 case 'F':
1323 case 'g':
1324 case 'G':
Eric Smithfa767ef2008-01-28 10:59:27 +00001325 case '%':
1326 /* convert to float */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001327 tmp = PyNumber_Float(obj);
Eric Smithfa767ef2008-01-28 10:59:27 +00001328 if (tmp == NULL)
1329 goto done;
Eric Smithf64bce82009-04-13 00:50:23 +00001330 result = format_float_internal(tmp, &format);
Eric Smithfa767ef2008-01-28 10:59:27 +00001331 break;
1332
Eric Smith8c663262007-08-25 02:26:07 +00001333 default:
1334 /* unknown */
Eric Smith5e5c0db2009-02-20 14:25:03 +00001335 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smith8c663262007-08-25 02:26:07 +00001336 goto done;
1337 }
1338
1339done:
1340 Py_XDECREF(tmp);
1341 return result;
1342}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001343#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +00001344
Eric Smith8fd3eba2008-02-17 19:48:00 +00001345#ifdef FORMAT_LONG
1346/* Need to define long_format as a function that will convert a long
1347 to a string. In 3.0, _PyLong_Format has the correct signature. In
1348 2.x, we need to fudge a few parameters */
1349#if PY_VERSION_HEX >= 0x03000000
1350#define long_format _PyLong_Format
1351#else
1352static PyObject*
1353long_format(PyObject* value, int base)
1354{
1355 /* Convert to base, don't add trailing 'L', and use the new octal
1356 format. We already know this is a long object */
1357 assert(PyLong_Check(value));
1358 /* convert to base, don't add 'L', and use the new octal format */
1359 return _PyLong_Format(value, base, 0, 1);
1360}
1361#endif
1362
1363PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001364FORMAT_LONG(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001365 STRINGLIB_CHAR *format_spec,
1366 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +00001367{
Eric Smith4a7d76d2008-05-30 18:10:19 +00001368 return format_int_or_long(obj, format_spec, format_spec_len,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001369 long_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +00001370}
1371#endif /* FORMAT_LONG */
1372
1373#ifdef FORMAT_INT
1374/* this is only used for 2.x, not 3.0 */
1375static PyObject*
1376int_format(PyObject* value, int base)
1377{
1378 /* Convert to base, and use the new octal format. We already
1379 know this is an int object */
1380 assert(PyInt_Check(value));
1381 return _PyInt_Format((PyIntObject*)value, base, 1);
1382}
1383
1384PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001385FORMAT_INT(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001386 STRINGLIB_CHAR *format_spec,
1387 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +00001388{
Eric Smith4a7d76d2008-05-30 18:10:19 +00001389 return format_int_or_long(obj, format_spec, format_spec_len,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001390 int_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +00001391}
1392#endif /* FORMAT_INT */
1393
1394#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +00001395PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001396FORMAT_FLOAT(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001397 STRINGLIB_CHAR *format_spec,
1398 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +00001399{
Eric Smith8c663262007-08-25 02:26:07 +00001400 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +00001401 InternalFormatSpec format;
1402
Eric Smith8c663262007-08-25 02:26:07 +00001403 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +00001404 it equivalent to str(obj) */
1405 if (format_spec_len == 0) {
1406 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +00001407 goto done;
1408 }
1409
1410 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001411 if (!parse_internal_render_format_spec(format_spec,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001412 format_spec_len,
1413 &format, '\0'))
Eric Smith8c663262007-08-25 02:26:07 +00001414 goto done;
1415
1416 /* type conversion? */
1417 switch (format.type) {
Eric Smith0923d1d2009-04-16 20:16:10 +00001418 case '\0': /* No format code: like 'g', but with at least one decimal. */
Eric Smith8c663262007-08-25 02:26:07 +00001419 case 'e':
1420 case 'E':
1421 case 'f':
1422 case 'F':
1423 case 'g':
1424 case 'G':
1425 case 'n':
1426 case '%':
1427 /* no conversion, already a float. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001428 result = format_float_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +00001429 break;
1430
1431 default:
1432 /* unknown */
Eric Smith5e5c0db2009-02-20 14:25:03 +00001433 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smith8c663262007-08-25 02:26:07 +00001434 goto done;
1435 }
1436
1437done:
Eric Smith8c663262007-08-25 02:26:07 +00001438 return result;
1439}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001440#endif /* FORMAT_FLOAT */
Eric Smith58a42242009-04-30 01:00:33 +00001441
1442#ifdef FORMAT_COMPLEX
1443PyObject *
1444FORMAT_COMPLEX(PyObject *obj,
1445 STRINGLIB_CHAR *format_spec,
1446 Py_ssize_t format_spec_len)
1447{
1448 PyObject *result = NULL;
1449 InternalFormatSpec format;
1450
1451 /* check for the special case of zero length format spec, make
1452 it equivalent to str(obj) */
1453 if (format_spec_len == 0) {
1454 result = STRINGLIB_TOSTR(obj);
1455 goto done;
1456 }
1457
1458 /* parse the format_spec */
1459 if (!parse_internal_render_format_spec(format_spec,
1460 format_spec_len,
1461 &format, '\0'))
1462 goto done;
1463
1464 /* type conversion? */
1465 switch (format.type) {
1466 case '\0': /* No format code: like 'g', but with at least one decimal. */
1467 case 'e':
1468 case 'E':
1469 case 'f':
1470 case 'F':
1471 case 'g':
1472 case 'G':
1473 case 'n':
1474 /* no conversion, already a complex. do the formatting */
1475 result = format_complex_internal(obj, &format);
1476 break;
1477
1478 default:
1479 /* unknown */
1480 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1481 goto done;
1482 }
1483
1484done:
1485 return result;
1486}
1487#endif /* FORMAT_COMPLEX */