blob: f4a3ea3d58127c5caf3697d451b5b7364e4565db [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
Eric Smith0923d1d2009-04-16 20:16:10 +00004#include <locale.h>
5
Eric Smith8c663262007-08-25 02:26:07 +00006/* Before including this, you must include either:
7 stringlib/unicodedefs.h
8 stringlib/stringdefs.h
9
10 Also, you should define the names:
11 FORMAT_STRING
12 FORMAT_LONG
13 FORMAT_FLOAT
Eric Smith58a42242009-04-30 01:00:33 +000014 FORMAT_COMPLEX
Eric Smith8c663262007-08-25 02:26:07 +000015 to be whatever you want the public names of these functions to
16 be. These are the only non-static functions defined here.
17*/
18
Eric Smith5e5c0db2009-02-20 14:25:03 +000019/* Raises an exception about an unknown presentation type for this
20 * type. */
21
22static void
23unknown_presentation_type(STRINGLIB_CHAR presentation_type,
24 const char* type_name)
25{
26#if STRINGLIB_IS_UNICODE
27 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
28 hence the two cases. If it is char, gcc complains that the
29 condition below is always true, hence the ifdef. */
30 if (presentation_type > 32 && presentation_type < 128)
31#endif
32 PyErr_Format(PyExc_ValueError,
33 "Unknown format code '%c' "
34 "for object of type '%.200s'",
Eric Smithbeddd702009-07-30 13:43:08 +000035 (char)presentation_type,
Eric Smith5e5c0db2009-02-20 14:25:03 +000036 type_name);
37#if STRINGLIB_IS_UNICODE
38 else
39 PyErr_Format(PyExc_ValueError,
40 "Unknown format code '\\x%x' "
41 "for object of type '%.200s'",
42 (unsigned int)presentation_type,
43 type_name);
44#endif
45}
46
Eric Smithbeddd702009-07-30 13:43:08 +000047static void
48invalid_comma_type(STRINGLIB_CHAR presentation_type)
49{
50#if STRINGLIB_IS_UNICODE
51 /* See comment in unknown_presentation_type */
52 if (presentation_type > 32 && presentation_type < 128)
53#endif
54 PyErr_Format(PyExc_ValueError,
55 "Cannot specify ',' with '%c'.",
56 (char)presentation_type);
57#if STRINGLIB_IS_UNICODE
58 else
59 PyErr_Format(PyExc_ValueError,
60 "Cannot specify ',' with '\\x%x'.",
61 (unsigned int)presentation_type);
62#endif
63}
64
Eric Smith8c663262007-08-25 02:26:07 +000065/*
66 get_integer consumes 0 or more decimal digit characters from an
67 input string, updates *result with the corresponding positive
68 integer, and returns the number of digits consumed.
69
70 returns -1 on error.
71*/
72static int
73get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
74 Py_ssize_t *result)
75{
76 Py_ssize_t accumulator, digitval, oldaccumulator;
77 int numdigits;
78 accumulator = numdigits = 0;
79 for (;;(*ptr)++, numdigits++) {
80 if (*ptr >= end)
81 break;
82 digitval = STRINGLIB_TODECIMAL(**ptr);
83 if (digitval < 0)
84 break;
85 /*
86 This trick was copied from old Unicode format code. It's cute,
87 but would really suck on an old machine with a slow divide
88 implementation. Fortunately, in the normal case we do not
89 expect too many digits.
90 */
91 oldaccumulator = accumulator;
92 accumulator *= 10;
93 if ((accumulator+10)/10 != oldaccumulator+1) {
94 PyErr_Format(PyExc_ValueError,
95 "Too many decimal digits in format string");
96 return -1;
97 }
98 accumulator += digitval;
99 }
100 *result = accumulator;
101 return numdigits;
102}
103
104/************************************************************************/
105/*********** standard format specifier parsing **************************/
106/************************************************************************/
107
108/* returns true if this character is a specifier alignment token */
109Py_LOCAL_INLINE(int)
110is_alignment_token(STRINGLIB_CHAR c)
111{
112 switch (c) {
113 case '<': case '>': case '=': case '^':
114 return 1;
115 default:
116 return 0;
117 }
118}
119
120/* returns true if this character is a sign element */
121Py_LOCAL_INLINE(int)
122is_sign_element(STRINGLIB_CHAR c)
123{
124 switch (c) {
Eric Smithb7f5ba12007-08-29 12:38:45 +0000125 case ' ': case '+': case '-':
Eric Smith8c663262007-08-25 02:26:07 +0000126 return 1;
127 default:
128 return 0;
129 }
130}
131
132
133typedef struct {
134 STRINGLIB_CHAR fill_char;
135 STRINGLIB_CHAR align;
Eric Smithb1ebcc62008-07-15 13:02:41 +0000136 int alternate;
Eric Smith8c663262007-08-25 02:26:07 +0000137 STRINGLIB_CHAR sign;
138 Py_ssize_t width;
Eric Smitha3b1ac82009-04-03 14:45:06 +0000139 int thousands_separators;
Eric Smith8c663262007-08-25 02:26:07 +0000140 Py_ssize_t precision;
141 STRINGLIB_CHAR type;
142} InternalFormatSpec;
143
144/*
145 ptr points to the start of the format_spec, end points just past its end.
146 fills in format with the parsed information.
147 returns 1 on success, 0 on failure.
148 if failure, sets the exception
149*/
150static int
Eric Smith4a7d76d2008-05-30 18:10:19 +0000151parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000152 Py_ssize_t format_spec_len,
Eric Smith8c663262007-08-25 02:26:07 +0000153 InternalFormatSpec *format,
154 char default_type)
155{
Eric Smith4a7d76d2008-05-30 18:10:19 +0000156 STRINGLIB_CHAR *ptr = format_spec;
157 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smith8c663262007-08-25 02:26:07 +0000158
159 /* end-ptr is used throughout this code to specify the length of
160 the input string */
161
Eric Smith0923d1d2009-04-16 20:16:10 +0000162 Py_ssize_t consumed;
Eric Smith8c663262007-08-25 02:26:07 +0000163
164 format->fill_char = '\0';
165 format->align = '\0';
Eric Smithb1ebcc62008-07-15 13:02:41 +0000166 format->alternate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000167 format->sign = '\0';
168 format->width = -1;
Eric Smitha3b1ac82009-04-03 14:45:06 +0000169 format->thousands_separators = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000170 format->precision = -1;
171 format->type = default_type;
172
173 /* If the second char is an alignment token,
174 then parse the fill char */
175 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
176 format->align = ptr[1];
177 format->fill_char = ptr[0];
178 ptr += 2;
Eric Smith0cb431c2007-08-28 01:07:27 +0000179 }
180 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith8c663262007-08-25 02:26:07 +0000181 format->align = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000182 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000183 }
184
185 /* Parse the various sign options */
186 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
187 format->sign = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000188 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000189 }
190
Eric Smithd68af8f2008-07-16 00:15:35 +0000191 /* If the next character is #, we're in alternate mode. This only
192 applies to integers. */
193 if (end-ptr >= 1 && ptr[0] == '#') {
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000194 format->alternate = 1;
195 ++ptr;
Eric Smithd68af8f2008-07-16 00:15:35 +0000196 }
197
Eric Smith8c663262007-08-25 02:26:07 +0000198 /* The special case for 0-padding (backwards compat) */
Eric Smith185e30c2007-08-30 22:23:08 +0000199 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith8c663262007-08-25 02:26:07 +0000200 format->fill_char = '0';
201 if (format->align == '\0') {
202 format->align = '=';
203 }
Christian Heimesc3f30c42008-02-22 16:37:40 +0000204 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000205 }
206
Eric Smith0923d1d2009-04-16 20:16:10 +0000207 consumed = get_integer(&ptr, end, &format->width);
208 if (consumed == -1)
209 /* Overflow error. Exception already set. */
210 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000211
Eric Smith0923d1d2009-04-16 20:16:10 +0000212 /* If consumed is 0, we didn't consume any characters for the
213 width. In that case, reset the width to -1, because
214 get_integer() will have set it to zero. -1 is how we record
215 that the width wasn't specified. */
216 if (consumed == 0)
Eric Smith8c663262007-08-25 02:26:07 +0000217 format->width = -1;
Eric Smith8c663262007-08-25 02:26:07 +0000218
Eric Smitha3b1ac82009-04-03 14:45:06 +0000219 /* Comma signifies add thousands separators */
220 if (end-ptr && ptr[0] == ',') {
221 format->thousands_separators = 1;
222 ++ptr;
223 }
224
Eric Smith8c663262007-08-25 02:26:07 +0000225 /* Parse field precision */
226 if (end-ptr && ptr[0] == '.') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000227 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000228
Eric Smith0923d1d2009-04-16 20:16:10 +0000229 consumed = get_integer(&ptr, end, &format->precision);
230 if (consumed == -1)
231 /* Overflow error. Exception already set. */
232 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000233
Eric Smith0923d1d2009-04-16 20:16:10 +0000234 /* Not having a precision after a dot is an error. */
235 if (consumed == 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000236 PyErr_Format(PyExc_ValueError,
237 "Format specifier missing precision");
238 return 0;
239 }
240
241 }
242
Eric Smith0923d1d2009-04-16 20:16:10 +0000243 /* Finally, parse the type field. */
Eric Smith8c663262007-08-25 02:26:07 +0000244
245 if (end-ptr > 1) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000246 /* More than one char remain, invalid conversion spec. */
Eric Smith8c663262007-08-25 02:26:07 +0000247 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
248 return 0;
249 }
250
251 if (end-ptr == 1) {
252 format->type = ptr[0];
Christian Heimesc3f30c42008-02-22 16:37:40 +0000253 ++ptr;
Eric Smith8c663262007-08-25 02:26:07 +0000254 }
255
Eric Smith0923d1d2009-04-16 20:16:10 +0000256 /* Do as much validating as we can, just by looking at the format
257 specifier. Do not take into account what type of formatting
258 we're doing (int, float, string). */
259
260 if (format->thousands_separators) {
261 switch (format->type) {
262 case 'd':
263 case 'e':
264 case 'f':
265 case 'g':
266 case 'E':
267 case 'G':
268 case '%':
269 case 'F':
Eric Smith937491d2009-04-22 17:04:27 +0000270 case '\0':
Eric Smith0923d1d2009-04-16 20:16:10 +0000271 /* These are allowed. See PEP 378.*/
272 break;
273 default:
Eric Smithbeddd702009-07-30 13:43:08 +0000274 invalid_comma_type(format->type);
Eric Smith0923d1d2009-04-16 20:16:10 +0000275 return 0;
276 }
Eric Smitha3b1ac82009-04-03 14:45:06 +0000277 }
278
Eric Smith8c663262007-08-25 02:26:07 +0000279 return 1;
280}
281
Eric Smith58a42242009-04-30 01:00:33 +0000282/* Calculate the padding needed. */
283static void
284calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
287{
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
293 }
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
297 }
298
299 /* figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else
306 *n_lpadding = 0;
307
308 *n_rpadding = *n_total - nchars - *n_lpadding;
309}
310
311/* Do the padding, and return a pointer to where the caller-supplied
312 content goes. */
313static STRINGLIB_CHAR *
314fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
315 Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
316{
317 /* Pad on left. */
318 if (n_lpadding)
319 STRINGLIB_FILL(p, fill_char, n_lpadding);
320
321 /* Pad on right. */
322 if (n_rpadding)
323 STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
324
325 /* Pointer to the user content. */
326 return p + n_lpadding;
327}
328
329#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
Eric Smith8c663262007-08-25 02:26:07 +0000330/************************************************************************/
331/*********** common routines for numeric formatting *********************/
332/************************************************************************/
333
Eric Smith0923d1d2009-04-16 20:16:10 +0000334/* Locale type codes. */
335#define LT_CURRENT_LOCALE 0
336#define LT_DEFAULT_LOCALE 1
337#define LT_NO_LOCALE 2
338
339/* Locale info needed for formatting integers and the part of floats
340 before and including the decimal. Note that locales only support
341 8-bit chars, not unicode. */
342typedef struct {
343 char *decimal_point;
344 char *thousands_sep;
345 char *grouping;
346} LocaleInfo;
347
Eric Smith8c663262007-08-25 02:26:07 +0000348/* describes the layout for an integer, see the comment in
Eric Smithd68af8f2008-07-16 00:15:35 +0000349 calc_number_widths() for details */
Eric Smith8c663262007-08-25 02:26:07 +0000350typedef struct {
351 Py_ssize_t n_lpadding;
Eric Smithd68af8f2008-07-16 00:15:35 +0000352 Py_ssize_t n_prefix;
Eric Smith8c663262007-08-25 02:26:07 +0000353 Py_ssize_t n_spadding;
354 Py_ssize_t n_rpadding;
Eric Smith0923d1d2009-04-16 20:16:10 +0000355 char sign;
356 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
357 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
358 any grouping chars. */
359 Py_ssize_t n_decimal; /* 0 if only an integer */
360 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
361 excluding the decimal itself, if
362 present. */
363
364 /* These 2 are not the widths of fields, but are needed by
365 STRINGLIB_GROUPING. */
366 Py_ssize_t n_digits; /* The number of digits before a decimal
367 or exponent. */
368 Py_ssize_t n_min_width; /* The min_width we used when we computed
369 the n_grouped_digits width. */
Eric Smith8c663262007-08-25 02:26:07 +0000370} NumberFieldWidths;
371
Eric Smith58a42242009-04-30 01:00:33 +0000372
Eric Smith0923d1d2009-04-16 20:16:10 +0000373/* Given a number of the form:
374 digits[remainder]
375 where ptr points to the start and end points to the end, find where
376 the integer part ends. This could be a decimal, an exponent, both,
377 or neither.
378 If a decimal point is present, set *has_decimal and increment
379 remainder beyond it.
380 Results are undefined (but shouldn't crash) for improperly
381 formatted strings.
382*/
383static void
384parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
385 Py_ssize_t *n_remainder, int *has_decimal)
386{
387 STRINGLIB_CHAR *end = ptr + len;
388 STRINGLIB_CHAR *remainder;
389
390 while (ptr<end && isdigit(*ptr))
391 ++ptr;
392 remainder = ptr;
393
394 /* Does remainder start with a decimal point? */
395 *has_decimal = ptr<end && *remainder == '.';
396
397 /* Skip the decimal point. */
398 if (*has_decimal)
399 remainder++;
400
401 *n_remainder = end - remainder;
402}
403
Eric Smith8c663262007-08-25 02:26:07 +0000404/* not all fields of format are used. for example, precision is
405 unused. should this take discrete params in order to be more clear
406 about what it does? or is passing a single format parameter easier
407 and more efficient enough to justify a little obfuscation? */
Eric Smith0923d1d2009-04-16 20:16:10 +0000408static Py_ssize_t
409calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
410 STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
411 Py_ssize_t n_number, Py_ssize_t n_remainder,
412 int has_decimal, const LocaleInfo *locale,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000413 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000414{
Eric Smith0923d1d2009-04-16 20:16:10 +0000415 Py_ssize_t n_non_digit_non_padding;
416 Py_ssize_t n_padding;
417
418 spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
Eric Smith05212a12008-07-16 19:41:14 +0000419 spec->n_lpadding = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000420 spec->n_prefix = n_prefix;
421 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
422 spec->n_remainder = n_remainder;
Eric Smith05212a12008-07-16 19:41:14 +0000423 spec->n_spadding = 0;
424 spec->n_rpadding = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000425 spec->sign = '\0';
426 spec->n_sign = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000427
428 /* the output will look like:
Eric Smith0923d1d2009-04-16 20:16:10 +0000429 | |
430 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
431 | |
Eric Smith8c663262007-08-25 02:26:07 +0000432
Eric Smith0923d1d2009-04-16 20:16:10 +0000433 sign is computed from format->sign and the actual
Eric Smith8c663262007-08-25 02:26:07 +0000434 sign of the number
435
Eric Smithb1ebcc62008-07-15 13:02:41 +0000436 prefix is given (it's for the '0x' prefix)
437
Eric Smith8c663262007-08-25 02:26:07 +0000438 digits is already known
439
440 the total width is either given, or computed from the
441 actual digits
442
443 only one of lpadding, spadding, and rpadding can be non-zero,
444 and it's calculated from the width and other fields
445 */
446
447 /* compute the various parts we're going to write */
Eric Smith0923d1d2009-04-16 20:16:10 +0000448 switch (format->sign) {
449 case '+':
Eric Smith8c663262007-08-25 02:26:07 +0000450 /* always put a + or - */
Eric Smith0923d1d2009-04-16 20:16:10 +0000451 spec->n_sign = 1;
452 spec->sign = (sign_char == '-' ? '-' : '+');
453 break;
454 case ' ':
455 spec->n_sign = 1;
456 spec->sign = (sign_char == '-' ? '-' : ' ');
457 break;
458 default:
459 /* Not specified, or the default (-) */
460 if (sign_char == '-') {
461 spec->n_sign = 1;
462 spec->sign = '-';
Eric Smith8c663262007-08-25 02:26:07 +0000463 }
464 }
465
Eric Smith0923d1d2009-04-16 20:16:10 +0000466 /* The number of chars used for non-digits and non-padding. */
467 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
468 spec->n_remainder;
Eric Smithd68af8f2008-07-16 00:15:35 +0000469
Eric Smith0923d1d2009-04-16 20:16:10 +0000470 /* min_width can go negative, that's okay. format->width == -1 means
471 we don't care. */
472 if (format->fill_char == '0')
473 spec->n_min_width = format->width - n_non_digit_non_padding;
474 else
475 spec->n_min_width = 0;
476
477 if (spec->n_digits == 0)
478 /* This case only occurs when using 'c' formatting, we need
479 to special case it because the grouping code always wants
480 to have at least one character. */
481 spec->n_grouped_digits = 0;
482 else
483 spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
484 spec->n_digits,
485 spec->n_min_width,
486 locale->grouping,
487 locale->thousands_sep);
488
489 /* Given the desired width and the total of digit and non-digit
490 space we consume, see if we need any padding. format->width can
491 be negative (meaning no padding), but this code still works in
492 that case. */
493 n_padding = format->width -
494 (n_non_digit_non_padding + spec->n_grouped_digits);
495 if (n_padding > 0) {
496 /* Some padding is needed. Determine if it's left, space, or right. */
497 switch (format->align) {
498 case '<':
499 spec->n_rpadding = n_padding;
500 break;
501 case '^':
502 spec->n_lpadding = n_padding / 2;
503 spec->n_rpadding = n_padding - spec->n_lpadding;
504 break;
505 case '=':
506 spec->n_spadding = n_padding;
507 break;
508 default:
509 /* Handles '>', plus catch-all just in case. */
510 spec->n_lpadding = n_padding;
511 break;
Eric Smith8c663262007-08-25 02:26:07 +0000512 }
513 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000514 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
515 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
516 spec->n_remainder + spec->n_rpadding;
Eric Smith8c663262007-08-25 02:26:07 +0000517}
518
Eric Smith0923d1d2009-04-16 20:16:10 +0000519/* Fill in the digit parts of a numbers's string representation,
520 as determined in calc_number_widths().
521 No error checking, since we know the buffer is the correct size. */
522static void
523fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
524 STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
525 STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
526 LocaleInfo *locale, int toupper)
Eric Smith8c663262007-08-25 02:26:07 +0000527{
Eric Smith0923d1d2009-04-16 20:16:10 +0000528 /* Used to keep track of digits, decimal, and remainder. */
529 STRINGLIB_CHAR *p = digits;
530
531#ifndef NDEBUG
532 Py_ssize_t r;
533#endif
Eric Smith8c663262007-08-25 02:26:07 +0000534
535 if (spec->n_lpadding) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000536 STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
537 buf += spec->n_lpadding;
Eric Smith8c663262007-08-25 02:26:07 +0000538 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000539 if (spec->n_sign == 1) {
540 *buf++ = spec->sign;
Eric Smith8c663262007-08-25 02:26:07 +0000541 }
Eric Smithd68af8f2008-07-16 00:15:35 +0000542 if (spec->n_prefix) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000543 memmove(buf,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000544 prefix,
545 spec->n_prefix * sizeof(STRINGLIB_CHAR));
Eric Smith0923d1d2009-04-16 20:16:10 +0000546 if (toupper) {
547 Py_ssize_t t;
548 for (t = 0; t < spec->n_prefix; ++t)
549 buf[t] = STRINGLIB_TOUPPER(buf[t]);
550 }
551 buf += spec->n_prefix;
Eric Smithd68af8f2008-07-16 00:15:35 +0000552 }
Eric Smith8c663262007-08-25 02:26:07 +0000553 if (spec->n_spadding) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000554 STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
555 buf += spec->n_spadding;
Eric Smith8c663262007-08-25 02:26:07 +0000556 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000557
558 /* Only for type 'c' special case, it has no digits. */
559 if (spec->n_digits != 0) {
560 /* Fill the digits with InsertThousandsGrouping. */
561#ifndef NDEBUG
562 r =
563#endif
564 STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
565 spec->n_digits, spec->n_min_width,
566 locale->grouping, locale->thousands_sep);
567#ifndef NDEBUG
568 assert(r == spec->n_grouped_digits);
569#endif
570 p += spec->n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000571 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000572 if (toupper) {
573 Py_ssize_t t;
574 for (t = 0; t < spec->n_grouped_digits; ++t)
575 buf[t] = STRINGLIB_TOUPPER(buf[t]);
576 }
577 buf += spec->n_grouped_digits;
578
579 if (spec->n_decimal) {
580 Py_ssize_t t;
581 for (t = 0; t < spec->n_decimal; ++t)
582 buf[t] = locale->decimal_point[t];
583 buf += spec->n_decimal;
584 p += 1;
585 }
586
587 if (spec->n_remainder) {
588 memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
589 buf += spec->n_remainder;
590 p += spec->n_remainder;
591 }
592
Eric Smith8c663262007-08-25 02:26:07 +0000593 if (spec->n_rpadding) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000594 STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
595 buf += spec->n_rpadding;
Eric Smith8c663262007-08-25 02:26:07 +0000596 }
Eric Smith8c663262007-08-25 02:26:07 +0000597}
Eric Smith0923d1d2009-04-16 20:16:10 +0000598
599static char no_grouping[1] = {CHAR_MAX};
600
601/* Find the decimal point character(s?), thousands_separator(s?), and
602 grouping description, either for the current locale if type is
603 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
604 none if LT_NO_LOCALE. */
605static void
606get_locale_info(int type, LocaleInfo *locale_info)
607{
608 switch (type) {
609 case LT_CURRENT_LOCALE: {
610 struct lconv *locale_data = localeconv();
611 locale_info->decimal_point = locale_data->decimal_point;
612 locale_info->thousands_sep = locale_data->thousands_sep;
613 locale_info->grouping = locale_data->grouping;
614 break;
615 }
616 case LT_DEFAULT_LOCALE:
617 locale_info->decimal_point = ".";
618 locale_info->thousands_sep = ",";
619 locale_info->grouping = "\3"; /* Group every 3 characters,
620 trailing 0 means repeat
621 infinitely. */
622 break;
623 case LT_NO_LOCALE:
624 locale_info->decimal_point = ".";
625 locale_info->thousands_sep = "";
626 locale_info->grouping = no_grouping;
627 break;
628 default:
629 assert(0);
630 }
631}
632
Eric Smith58a42242009-04-30 01:00:33 +0000633#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
Eric Smith8c663262007-08-25 02:26:07 +0000634
635/************************************************************************/
636/*********** string formatting ******************************************/
637/************************************************************************/
638
639static PyObject *
640format_string_internal(PyObject *value, const InternalFormatSpec *format)
641{
Eric Smith8c663262007-08-25 02:26:07 +0000642 Py_ssize_t lpad;
Eric Smith58a42242009-04-30 01:00:33 +0000643 Py_ssize_t rpad;
644 Py_ssize_t total;
645 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000646 Py_ssize_t len = STRINGLIB_LEN(value);
647 PyObject *result = NULL;
648
649 /* sign is not allowed on strings */
650 if (format->sign != '\0') {
651 PyErr_SetString(PyExc_ValueError,
652 "Sign not allowed in string format specifier");
653 goto done;
654 }
655
Eric Smithb1ebcc62008-07-15 13:02:41 +0000656 /* alternate is not allowed on strings */
657 if (format->alternate) {
658 PyErr_SetString(PyExc_ValueError,
659 "Alternate form (#) not allowed in string format "
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000660 "specifier");
Eric Smithb1ebcc62008-07-15 13:02:41 +0000661 goto done;
662 }
663
Eric Smith8c663262007-08-25 02:26:07 +0000664 /* '=' alignment not allowed on strings */
665 if (format->align == '=') {
666 PyErr_SetString(PyExc_ValueError,
667 "'=' alignment not allowed "
668 "in string format specifier");
669 goto done;
670 }
671
672 /* if precision is specified, output no more that format.precision
673 characters */
674 if (format->precision >= 0 && len >= format->precision) {
675 len = format->precision;
676 }
677
Eric Smith58a42242009-04-30 01:00:33 +0000678 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
Eric Smith8c663262007-08-25 02:26:07 +0000679
680 /* allocate the resulting string */
Eric Smith58a42242009-04-30 01:00:33 +0000681 result = STRINGLIB_NEW(NULL, total);
Eric Smith8c663262007-08-25 02:26:07 +0000682 if (result == NULL)
683 goto done;
684
Eric Smith58a42242009-04-30 01:00:33 +0000685 /* Write into that space. First the padding. */
686 p = fill_padding(STRINGLIB_STR(result), len,
687 format->fill_char=='\0'?' ':format->fill_char,
688 lpad, rpad);
Eric Smith8c663262007-08-25 02:26:07 +0000689
Eric Smith58a42242009-04-30 01:00:33 +0000690 /* Then the source string. */
691 memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
Eric Smith8c663262007-08-25 02:26:07 +0000692
693done:
694 return result;
695}
696
697
698/************************************************************************/
699/*********** long formatting ********************************************/
700/************************************************************************/
701
Eric Smith8fd3eba2008-02-17 19:48:00 +0000702#if defined FORMAT_LONG || defined FORMAT_INT
703typedef PyObject*
704(*IntOrLongToString)(PyObject *value, int base);
705
Eric Smith8c663262007-08-25 02:26:07 +0000706static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +0000707format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000708 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +0000709{
710 PyObject *result = NULL;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000711 PyObject *tmp = NULL;
712 STRINGLIB_CHAR *pnumeric_chars;
713 STRINGLIB_CHAR numeric_char;
Eric Smith0923d1d2009-04-16 20:16:10 +0000714 STRINGLIB_CHAR sign_char = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000715 Py_ssize_t n_digits; /* count of digits need from the computed
716 string */
Eric Smith0923d1d2009-04-16 20:16:10 +0000717 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
718 produces non-digits */
Eric Smithd68af8f2008-07-16 00:15:35 +0000719 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
Eric Smith0923d1d2009-04-16 20:16:10 +0000720 Py_ssize_t n_total;
Eric Smithd68af8f2008-07-16 00:15:35 +0000721 STRINGLIB_CHAR *prefix = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000722 NumberFieldWidths spec;
723 long x;
724
Eric Smith0923d1d2009-04-16 20:16:10 +0000725 /* Locale settings, either from the actual locale or
726 from a hard-code pseudo-locale */
727 LocaleInfo locale;
728
Eric Smith8c663262007-08-25 02:26:07 +0000729 /* no precision allowed on integers */
730 if (format->precision != -1) {
731 PyErr_SetString(PyExc_ValueError,
732 "Precision not allowed in integer format specifier");
733 goto done;
734 }
735
Eric Smith8c663262007-08-25 02:26:07 +0000736 /* special case for character formatting */
737 if (format->type == 'c') {
738 /* error to specify a sign */
739 if (format->sign != '\0') {
740 PyErr_SetString(PyExc_ValueError,
741 "Sign not allowed with integer"
742 " format specifier 'c'");
743 goto done;
744 }
745
Eric Smith0923d1d2009-04-16 20:16:10 +0000746 /* Error to specify a comma. */
747 if (format->thousands_separators) {
748 PyErr_SetString(PyExc_ValueError,
749 "Thousands separators not allowed with integer"
750 " format specifier 'c'");
751 goto done;
752 }
753
Eric Smith8c663262007-08-25 02:26:07 +0000754 /* taken from unicodeobject.c formatchar() */
755 /* Integer input truncated to a character */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000756/* XXX: won't work for int */
Christian Heimes217cfd12007-12-02 14:31:20 +0000757 x = PyLong_AsLong(value);
Eric Smith8c663262007-08-25 02:26:07 +0000758 if (x == -1 && PyErr_Occurred())
759 goto done;
760#ifdef Py_UNICODE_WIDE
761 if (x < 0 || x > 0x10ffff) {
762 PyErr_SetString(PyExc_OverflowError,
763 "%c arg not in range(0x110000) "
764 "(wide Python build)");
765 goto done;
766 }
767#else
768 if (x < 0 || x > 0xffff) {
769 PyErr_SetString(PyExc_OverflowError,
770 "%c arg not in range(0x10000) "
771 "(narrow Python build)");
772 goto done;
773 }
774#endif
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000775 numeric_char = (STRINGLIB_CHAR)x;
776 pnumeric_chars = &numeric_char;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000777 n_digits = 1;
Eric Smith0923d1d2009-04-16 20:16:10 +0000778
779 /* As a sort-of hack, we tell calc_number_widths that we only
780 have "remainder" characters. calc_number_widths thinks
781 these are characters that don't get formatted, only copied
782 into the output string. We do this for 'c' formatting,
783 because the characters are likely to be non-digits. */
784 n_remainder = 1;
Eric Smith0cb431c2007-08-28 01:07:27 +0000785 }
786 else {
Eric Smith8c663262007-08-25 02:26:07 +0000787 int base;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000788 int leading_chars_to_skip = 0; /* Number of characters added by
789 PyNumber_ToBase that we want to
790 skip over. */
Eric Smith8fd3eba2008-02-17 19:48:00 +0000791
792 /* Compute the base and how many characters will be added by
Eric Smith8c663262007-08-25 02:26:07 +0000793 PyNumber_ToBase */
794 switch (format->type) {
795 case 'b':
796 base = 2;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000797 leading_chars_to_skip = 2; /* 0b */
Eric Smith8c663262007-08-25 02:26:07 +0000798 break;
799 case 'o':
800 base = 8;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000801 leading_chars_to_skip = 2; /* 0o */
Eric Smith8c663262007-08-25 02:26:07 +0000802 break;
803 case 'x':
804 case 'X':
805 base = 16;
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000806 leading_chars_to_skip = 2; /* 0x */
Eric Smith8c663262007-08-25 02:26:07 +0000807 break;
808 default: /* shouldn't be needed, but stops a compiler warning */
809 case 'd':
Eric Smith5807c412008-05-11 21:00:57 +0000810 case 'n':
Eric Smith8c663262007-08-25 02:26:07 +0000811 base = 10;
Eric Smith8c663262007-08-25 02:26:07 +0000812 break;
813 }
814
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000815 /* The number of prefix chars is the same as the leading
816 chars to skip */
817 if (format->alternate)
818 n_prefix = leading_chars_to_skip;
Eric Smithd68af8f2008-07-16 00:15:35 +0000819
Eric Smith8fd3eba2008-02-17 19:48:00 +0000820 /* Do the hard part, converting to a string in a given base */
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000821 tmp = tostring(value, base);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000822 if (tmp == NULL)
Eric Smith8c663262007-08-25 02:26:07 +0000823 goto done;
824
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000825 pnumeric_chars = STRINGLIB_STR(tmp);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000826 n_digits = STRINGLIB_LEN(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000827
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000828 prefix = pnumeric_chars;
Eric Smithd68af8f2008-07-16 00:15:35 +0000829
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000830 /* Remember not to modify what pnumeric_chars points to. it
831 might be interned. Only modify it after we copy it into a
832 newly allocated output buffer. */
Eric Smith8c663262007-08-25 02:26:07 +0000833
Eric Smith8fd3eba2008-02-17 19:48:00 +0000834 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000835 and skip it */
Eric Smith0923d1d2009-04-16 20:16:10 +0000836 if (pnumeric_chars[0] == '-') {
837 sign_char = pnumeric_chars[0];
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000838 ++prefix;
839 ++leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000840 }
841
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000842 /* Skip over the leading chars (0x, 0b, etc.) */
843 n_digits -= leading_chars_to_skip;
844 pnumeric_chars += leading_chars_to_skip;
Eric Smith8c663262007-08-25 02:26:07 +0000845 }
846
Eric Smith0923d1d2009-04-16 20:16:10 +0000847 /* Determine the grouping, separator, and decimal point, if any. */
848 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
849 (format->thousands_separators ?
850 LT_DEFAULT_LOCALE :
851 LT_NO_LOCALE),
852 &locale);
Eric Smith5807c412008-05-11 21:00:57 +0000853
Eric Smith0923d1d2009-04-16 20:16:10 +0000854 /* Calculate how much memory we'll need. */
855 n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
856 n_digits, n_remainder, 0, &locale, format);
Eric Smithb151a452008-06-24 11:21:04 +0000857
Eric Smith0923d1d2009-04-16 20:16:10 +0000858 /* Allocate the memory. */
859 result = STRINGLIB_NEW(NULL, n_total);
Eric Smith8fd3eba2008-02-17 19:48:00 +0000860 if (!result)
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000861 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000862
Eric Smith0923d1d2009-04-16 20:16:10 +0000863 /* Populate the memory. */
864 fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
865 prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
866 &locale, format->type == 'X');
Eric Smithd68af8f2008-07-16 00:15:35 +0000867
Eric Smith8c663262007-08-25 02:26:07 +0000868done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000869 Py_XDECREF(tmp);
Eric Smith8c663262007-08-25 02:26:07 +0000870 return result;
871}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000872#endif /* defined FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +0000873
874/************************************************************************/
875/*********** float formatting *******************************************/
876/************************************************************************/
877
Eric Smith8fd3eba2008-02-17 19:48:00 +0000878#ifdef FORMAT_FLOAT
879#if STRINGLIB_IS_UNICODE
Eric Smith0923d1d2009-04-16 20:16:10 +0000880static void
881strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
Eric Smith8c663262007-08-25 02:26:07 +0000882{
Eric Smith0923d1d2009-04-16 20:16:10 +0000883 Py_ssize_t i;
884 for (i = 0; i < len; ++i)
885 buffer[i] = (Py_UNICODE)charbuffer[i];
Eric Smith8c663262007-08-25 02:26:07 +0000886}
Eric Smith8fd3eba2008-02-17 19:48:00 +0000887#endif
Eric Smith8c663262007-08-25 02:26:07 +0000888
Eric Smith8c663262007-08-25 02:26:07 +0000889/* much of this is taken from unicodeobject.c */
Eric Smith8c663262007-08-25 02:26:07 +0000890static PyObject *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000891format_float_internal(PyObject *value,
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000892 const InternalFormatSpec *format)
Eric Smith8c663262007-08-25 02:26:07 +0000893{
Eric Smith0923d1d2009-04-16 20:16:10 +0000894 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
Eric Smith8c663262007-08-25 02:26:07 +0000895 Py_ssize_t n_digits;
Eric Smith0923d1d2009-04-16 20:16:10 +0000896 Py_ssize_t n_remainder;
897 Py_ssize_t n_total;
898 int has_decimal;
899 double val;
Eric Smith8c663262007-08-25 02:26:07 +0000900 Py_ssize_t precision = format->precision;
Eric Smith63376222009-05-05 14:04:18 +0000901 Py_ssize_t default_precision = 6;
Eric Smith0923d1d2009-04-16 20:16:10 +0000902 STRINGLIB_CHAR type = format->type;
903 int add_pct = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000904 STRINGLIB_CHAR *p;
905 NumberFieldWidths spec;
Eric Smith0923d1d2009-04-16 20:16:10 +0000906 int flags = 0;
907 PyObject *result = NULL;
908 STRINGLIB_CHAR sign_char = '\0';
909 int float_type; /* Used to see if we have a nan, inf, or regular float. */
Eric Smith8c663262007-08-25 02:26:07 +0000910
911#if STRINGLIB_IS_UNICODE
Eric Smith0923d1d2009-04-16 20:16:10 +0000912 Py_UNICODE *unicode_tmp = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000913#endif
914
Eric Smith0923d1d2009-04-16 20:16:10 +0000915 /* Locale settings, either from the actual locale or
916 from a hard-code pseudo-locale */
917 LocaleInfo locale;
918
919 /* Alternate is not allowed on floats. */
Eric Smithb1ebcc62008-07-15 13:02:41 +0000920 if (format->alternate) {
921 PyErr_SetString(PyExc_ValueError,
922 "Alternate form (#) not allowed in float format "
Eric Smithf8c8b6d2009-04-03 11:19:31 +0000923 "specifier");
Eric Smithb1ebcc62008-07-15 13:02:41 +0000924 goto done;
925 }
926
Eric Smith0923d1d2009-04-16 20:16:10 +0000927 if (type == '\0') {
Eric Smith63376222009-05-05 14:04:18 +0000928 /* Omitted type specifier. This is like 'g' but with at least one
929 digit after the decimal point, and different default precision.*/
Eric Smith0923d1d2009-04-16 20:16:10 +0000930 type = 'g';
Eric Smith63376222009-05-05 14:04:18 +0000931 default_precision = PyFloat_STR_PRECISION;
Eric Smith0923d1d2009-04-16 20:16:10 +0000932 flags |= Py_DTSF_ADD_DOT_0;
933 }
934
935 if (type == 'n')
936 /* 'n' is the same as 'g', except for the locale used to
937 format the result. We take care of that later. */
938 type = 'g';
Eric Smith8c663262007-08-25 02:26:07 +0000939
Eric Smith0923d1d2009-04-16 20:16:10 +0000940 val = PyFloat_AsDouble(value);
941 if (val == -1.0 && PyErr_Occurred())
Eric Smith185e30c2007-08-30 22:23:08 +0000942 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000943
944 if (type == '%') {
945 type = 'f';
Eric Smith0923d1d2009-04-16 20:16:10 +0000946 val *= 100;
947 add_pct = 1;
Eric Smith8c663262007-08-25 02:26:07 +0000948 }
949
950 if (precision < 0)
Eric Smith63376222009-05-05 14:04:18 +0000951 precision = default_precision;
Eric Smith8c663262007-08-25 02:26:07 +0000952
Eric Smith0923d1d2009-04-16 20:16:10 +0000953 /* Cast "type", because if we're in unicode we need to pass a
954 8-bit char. This is safe, because we've restricted what "type"
955 can be. */
956 buf = PyOS_double_to_string(val, (char)type, precision, flags,
957 &float_type);
958 if (buf == NULL)
959 goto done;
960 n_digits = strlen(buf);
Eric Smith8c663262007-08-25 02:26:07 +0000961
Eric Smith0923d1d2009-04-16 20:16:10 +0000962 if (add_pct) {
963 /* We know that buf has a trailing zero (since we just called
964 strlen() on it), and we don't use that fact any more. So we
965 can just write over the trailing zero. */
966 buf[n_digits] = '%';
967 n_digits += 1;
968 }
Eric Smith8c663262007-08-25 02:26:07 +0000969
Eric Smith0923d1d2009-04-16 20:16:10 +0000970 /* Since there is no unicode version of PyOS_double_to_string,
971 just use the 8 bit version and then convert to unicode. */
Eric Smith8c663262007-08-25 02:26:07 +0000972#if STRINGLIB_IS_UNICODE
Eric Smith0923d1d2009-04-16 20:16:10 +0000973 unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
974 if (unicode_tmp == NULL) {
975 PyErr_NoMemory();
976 goto done;
977 }
978 strtounicode(unicode_tmp, buf, n_digits);
979 p = unicode_tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000980#else
Eric Smith0923d1d2009-04-16 20:16:10 +0000981 p = buf;
Eric Smith8c663262007-08-25 02:26:07 +0000982#endif
983
Eric Smith0923d1d2009-04-16 20:16:10 +0000984 /* Is a sign character present in the output? If so, remember it
Eric Smith8c663262007-08-25 02:26:07 +0000985 and skip it */
Eric Smith0923d1d2009-04-16 20:16:10 +0000986 if (*p == '-') {
987 sign_char = *p;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000988 ++p;
989 --n_digits;
Eric Smith8c663262007-08-25 02:26:07 +0000990 }
991
Eric Smith0923d1d2009-04-16 20:16:10 +0000992 /* Determine if we have any "remainder" (after the digits, might include
993 decimal or exponent or both (or neither)) */
994 parse_number(p, n_digits, &n_remainder, &has_decimal);
Eric Smith8c663262007-08-25 02:26:07 +0000995
Eric Smith0923d1d2009-04-16 20:16:10 +0000996 /* Determine the grouping, separator, and decimal point, if any. */
997 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
998 (format->thousands_separators ?
999 LT_DEFAULT_LOCALE :
1000 LT_NO_LOCALE),
1001 &locale);
1002
1003 /* Calculate how much memory we'll need. */
1004 n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
1005 n_remainder, has_decimal, &locale, format);
1006
1007 /* Allocate the memory. */
1008 result = STRINGLIB_NEW(NULL, n_total);
Eric Smith8c663262007-08-25 02:26:07 +00001009 if (result == NULL)
1010 goto done;
1011
Eric Smith0923d1d2009-04-16 20:16:10 +00001012 /* Populate the memory. */
1013 fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
1014 format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
1015 0);
Eric Smith8c663262007-08-25 02:26:07 +00001016
1017done:
Eric Smith0923d1d2009-04-16 20:16:10 +00001018 PyMem_Free(buf);
1019#if STRINGLIB_IS_UNICODE
1020 PyMem_Free(unicode_tmp);
1021#endif
Eric Smith8c663262007-08-25 02:26:07 +00001022 return result;
1023}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001024#endif /* FORMAT_FLOAT */
Eric Smith8c663262007-08-25 02:26:07 +00001025
1026/************************************************************************/
Eric Smith58a42242009-04-30 01:00:33 +00001027/*********** complex formatting *****************************************/
1028/************************************************************************/
1029
1030#ifdef FORMAT_COMPLEX
1031
1032static PyObject *
1033format_complex_internal(PyObject *value,
1034 const InternalFormatSpec *format)
1035{
1036 double re;
1037 double im;
1038 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1039 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1040
1041 InternalFormatSpec tmp_format = *format;
1042 Py_ssize_t n_re_digits;
1043 Py_ssize_t n_im_digits;
1044 Py_ssize_t n_re_remainder;
1045 Py_ssize_t n_im_remainder;
1046 Py_ssize_t n_re_total;
1047 Py_ssize_t n_im_total;
1048 int re_has_decimal;
1049 int im_has_decimal;
1050 Py_ssize_t precision = format->precision;
Eric Smith63376222009-05-05 14:04:18 +00001051 Py_ssize_t default_precision = 6;
Eric Smith58a42242009-04-30 01:00:33 +00001052 STRINGLIB_CHAR type = format->type;
1053 STRINGLIB_CHAR *p_re;
1054 STRINGLIB_CHAR *p_im;
1055 NumberFieldWidths re_spec;
1056 NumberFieldWidths im_spec;
1057 int flags = 0;
1058 PyObject *result = NULL;
1059 STRINGLIB_CHAR *p;
1060 STRINGLIB_CHAR re_sign_char = '\0';
1061 STRINGLIB_CHAR im_sign_char = '\0';
1062 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1063 int im_float_type;
1064 int add_parens = 0;
1065 int skip_re = 0;
1066 Py_ssize_t lpad;
1067 Py_ssize_t rpad;
1068 Py_ssize_t total;
1069
1070#if STRINGLIB_IS_UNICODE
1071 Py_UNICODE *re_unicode_tmp = NULL;
1072 Py_UNICODE *im_unicode_tmp = NULL;
1073#endif
1074
1075 /* Locale settings, either from the actual locale or
1076 from a hard-code pseudo-locale */
1077 LocaleInfo locale;
1078
1079 /* Alternate is not allowed on complex. */
1080 if (format->alternate) {
1081 PyErr_SetString(PyExc_ValueError,
1082 "Alternate form (#) not allowed in complex format "
1083 "specifier");
1084 goto done;
1085 }
1086
1087 /* Neither is zero pading. */
1088 if (format->fill_char == '0') {
1089 PyErr_SetString(PyExc_ValueError,
1090 "Zero padding is not allowed in complex format "
1091 "specifier");
1092 goto done;
1093 }
1094
1095 /* Neither is '=' alignment . */
1096 if (format->align == '=') {
1097 PyErr_SetString(PyExc_ValueError,
1098 "'=' alignment flag is not allowed in complex format "
1099 "specifier");
1100 goto done;
1101 }
1102
1103 re = PyComplex_RealAsDouble(value);
1104 if (re == -1.0 && PyErr_Occurred())
1105 goto done;
1106 im = PyComplex_ImagAsDouble(value);
1107 if (im == -1.0 && PyErr_Occurred())
1108 goto done;
1109
1110 if (type == '\0') {
1111 /* Omitted type specifier. Should be like str(self). */
1112 type = 'g';
Eric Smith63376222009-05-05 14:04:18 +00001113 default_precision = PyFloat_STR_PRECISION;
Eric Smith58a42242009-04-30 01:00:33 +00001114 add_parens = 1;
1115 if (re == 0.0)
1116 skip_re = 1;
1117 }
1118
1119 if (type == 'n')
1120 /* 'n' is the same as 'g', except for the locale used to
1121 format the result. We take care of that later. */
1122 type = 'g';
1123
Eric Smith58a42242009-04-30 01:00:33 +00001124 if (precision < 0)
Eric Smith63376222009-05-05 14:04:18 +00001125 precision = default_precision;
Eric Smith58a42242009-04-30 01:00:33 +00001126
1127 /* Cast "type", because if we're in unicode we need to pass a
1128 8-bit char. This is safe, because we've restricted what "type"
1129 can be. */
1130 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1131 &re_float_type);
1132 if (re_buf == NULL)
1133 goto done;
1134 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1135 &im_float_type);
1136 if (im_buf == NULL)
1137 goto done;
1138
1139 n_re_digits = strlen(re_buf);
1140 n_im_digits = strlen(im_buf);
1141
1142 /* Since there is no unicode version of PyOS_double_to_string,
1143 just use the 8 bit version and then convert to unicode. */
1144#if STRINGLIB_IS_UNICODE
1145 re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
1146 if (re_unicode_tmp == NULL) {
1147 PyErr_NoMemory();
1148 goto done;
1149 }
1150 strtounicode(re_unicode_tmp, re_buf, n_re_digits);
1151 p_re = re_unicode_tmp;
1152
1153 im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
1154 if (im_unicode_tmp == NULL) {
1155 PyErr_NoMemory();
1156 goto done;
1157 }
1158 strtounicode(im_unicode_tmp, im_buf, n_im_digits);
1159 p_im = im_unicode_tmp;
1160#else
1161 p_re = re_buf;
1162 p_im = im_buf;
1163#endif
1164
1165 /* Is a sign character present in the output? If so, remember it
1166 and skip it */
1167 if (*p_re == '-') {
1168 re_sign_char = *p_re;
1169 ++p_re;
1170 --n_re_digits;
1171 }
1172 if (*p_im == '-') {
1173 im_sign_char = *p_im;
1174 ++p_im;
1175 --n_im_digits;
1176 }
1177
1178 /* Determine if we have any "remainder" (after the digits, might include
1179 decimal or exponent or both (or neither)) */
1180 parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
1181 parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
1182
1183 /* Determine the grouping, separator, and decimal point, if any. */
1184 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1185 (format->thousands_separators ?
1186 LT_DEFAULT_LOCALE :
1187 LT_NO_LOCALE),
1188 &locale);
1189
1190 /* Turn off any padding. We'll do it later after we've composed
1191 the numbers without padding. */
1192 tmp_format.fill_char = '\0';
1193 tmp_format.align = '\0';
1194 tmp_format.width = -1;
1195
1196 /* Calculate how much memory we'll need. */
1197 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
1198 n_re_digits, n_re_remainder,
1199 re_has_decimal, &locale, &tmp_format);
1200
1201 /* Same formatting, but always include a sign. */
1202 tmp_format.sign = '+';
1203 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
1204 n_im_digits, n_im_remainder,
1205 im_has_decimal, &locale, &tmp_format);
1206
1207 if (skip_re)
1208 n_re_total = 0;
1209
1210 /* Add 1 for the 'j', and optionally 2 for parens. */
1211 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1212 format->width, format->align, &lpad, &rpad, &total);
1213
1214 result = STRINGLIB_NEW(NULL, total);
1215 if (result == NULL)
1216 goto done;
1217
1218 /* Populate the memory. First, the padding. */
1219 p = fill_padding(STRINGLIB_STR(result),
1220 n_re_total + n_im_total + 1 + add_parens * 2,
1221 format->fill_char=='\0' ? ' ' : format->fill_char,
1222 lpad, rpad);
1223
1224 if (add_parens)
1225 *p++ = '(';
1226
1227 if (!skip_re) {
1228 fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
1229 p += n_re_total;
1230 }
1231 fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
1232 p += n_im_total;
1233 *p++ = 'j';
1234
1235 if (add_parens)
1236 *p++ = ')';
1237
1238done:
1239 PyMem_Free(re_buf);
1240 PyMem_Free(im_buf);
1241#if STRINGLIB_IS_UNICODE
1242 PyMem_Free(re_unicode_tmp);
1243 PyMem_Free(im_unicode_tmp);
1244#endif
1245 return result;
1246}
1247#endif /* FORMAT_COMPLEX */
1248
1249/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +00001250/*********** built in formatters ****************************************/
1251/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +00001252PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001253FORMAT_STRING(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001254 STRINGLIB_CHAR *format_spec,
1255 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +00001256{
Eric Smith8c663262007-08-25 02:26:07 +00001257 InternalFormatSpec format;
Eric Smith4a7d76d2008-05-30 18:10:19 +00001258 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +00001259
1260 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +00001261 it equivalent to str(obj) */
1262 if (format_spec_len == 0) {
1263 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +00001264 goto done;
1265 }
1266
1267 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001268 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001269 &format, 's'))
Eric Smith8c663262007-08-25 02:26:07 +00001270 goto done;
1271
1272 /* type conversion? */
1273 switch (format.type) {
1274 case 's':
1275 /* no type conversion needed, already a string. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001276 result = format_string_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +00001277 break;
Eric Smith8c663262007-08-25 02:26:07 +00001278 default:
1279 /* unknown */
Eric Smith5e5c0db2009-02-20 14:25:03 +00001280 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smith8c663262007-08-25 02:26:07 +00001281 goto done;
1282 }
1283
1284done:
Eric Smith8c663262007-08-25 02:26:07 +00001285 return result;
1286}
1287
Eric Smith8fd3eba2008-02-17 19:48:00 +00001288#if defined FORMAT_LONG || defined FORMAT_INT
1289static PyObject*
Eric Smith4a7d76d2008-05-30 18:10:19 +00001290format_int_or_long(PyObject* obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001291 STRINGLIB_CHAR *format_spec,
1292 Py_ssize_t format_spec_len,
1293 IntOrLongToString tostring)
Eric Smith8c663262007-08-25 02:26:07 +00001294{
Eric Smith8c663262007-08-25 02:26:07 +00001295 PyObject *result = NULL;
1296 PyObject *tmp = NULL;
1297 InternalFormatSpec format;
1298
Eric Smith8c663262007-08-25 02:26:07 +00001299 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +00001300 it equivalent to str(obj) */
1301 if (format_spec_len == 0) {
1302 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +00001303 goto done;
1304 }
1305
1306 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001307 if (!parse_internal_render_format_spec(format_spec,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001308 format_spec_len,
1309 &format, 'd'))
Eric Smith8c663262007-08-25 02:26:07 +00001310 goto done;
1311
1312 /* type conversion? */
1313 switch (format.type) {
Eric Smith8c663262007-08-25 02:26:07 +00001314 case 'b':
1315 case 'c':
1316 case 'd':
1317 case 'o':
1318 case 'x':
1319 case 'X':
Eric Smith5807c412008-05-11 21:00:57 +00001320 case 'n':
Eric Smith8fd3eba2008-02-17 19:48:00 +00001321 /* no type conversion needed, already an int (or long). do
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001322 the formatting */
1323 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smith8c663262007-08-25 02:26:07 +00001324 break;
1325
Eric Smithfa767ef2008-01-28 10:59:27 +00001326 case 'e':
1327 case 'E':
1328 case 'f':
1329 case 'F':
1330 case 'g':
1331 case 'G':
Eric Smithfa767ef2008-01-28 10:59:27 +00001332 case '%':
1333 /* convert to float */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001334 tmp = PyNumber_Float(obj);
Eric Smithfa767ef2008-01-28 10:59:27 +00001335 if (tmp == NULL)
1336 goto done;
Eric Smithf64bce82009-04-13 00:50:23 +00001337 result = format_float_internal(tmp, &format);
Eric Smithfa767ef2008-01-28 10:59:27 +00001338 break;
1339
Eric Smith8c663262007-08-25 02:26:07 +00001340 default:
1341 /* unknown */
Eric Smith5e5c0db2009-02-20 14:25:03 +00001342 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smith8c663262007-08-25 02:26:07 +00001343 goto done;
1344 }
1345
1346done:
1347 Py_XDECREF(tmp);
1348 return result;
1349}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001350#endif /* FORMAT_LONG || defined FORMAT_INT */
Eric Smith8c663262007-08-25 02:26:07 +00001351
Eric Smith8fd3eba2008-02-17 19:48:00 +00001352#ifdef FORMAT_LONG
1353/* Need to define long_format as a function that will convert a long
1354 to a string. In 3.0, _PyLong_Format has the correct signature. In
1355 2.x, we need to fudge a few parameters */
1356#if PY_VERSION_HEX >= 0x03000000
1357#define long_format _PyLong_Format
1358#else
1359static PyObject*
1360long_format(PyObject* value, int base)
1361{
1362 /* Convert to base, don't add trailing 'L', and use the new octal
1363 format. We already know this is a long object */
1364 assert(PyLong_Check(value));
1365 /* convert to base, don't add 'L', and use the new octal format */
1366 return _PyLong_Format(value, base, 0, 1);
1367}
1368#endif
1369
1370PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001371FORMAT_LONG(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001372 STRINGLIB_CHAR *format_spec,
1373 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +00001374{
Eric Smith4a7d76d2008-05-30 18:10:19 +00001375 return format_int_or_long(obj, format_spec, format_spec_len,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001376 long_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +00001377}
1378#endif /* FORMAT_LONG */
1379
1380#ifdef FORMAT_INT
1381/* this is only used for 2.x, not 3.0 */
1382static PyObject*
1383int_format(PyObject* value, int base)
1384{
1385 /* Convert to base, and use the new octal format. We already
1386 know this is an int object */
1387 assert(PyInt_Check(value));
1388 return _PyInt_Format((PyIntObject*)value, base, 1);
1389}
1390
1391PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001392FORMAT_INT(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001393 STRINGLIB_CHAR *format_spec,
1394 Py_ssize_t format_spec_len)
Eric Smith8fd3eba2008-02-17 19:48:00 +00001395{
Eric Smith4a7d76d2008-05-30 18:10:19 +00001396 return format_int_or_long(obj, format_spec, format_spec_len,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001397 int_format);
Eric Smith8fd3eba2008-02-17 19:48:00 +00001398}
1399#endif /* FORMAT_INT */
1400
1401#ifdef FORMAT_FLOAT
Eric Smith8c663262007-08-25 02:26:07 +00001402PyObject *
Eric Smith4a7d76d2008-05-30 18:10:19 +00001403FORMAT_FLOAT(PyObject *obj,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001404 STRINGLIB_CHAR *format_spec,
1405 Py_ssize_t format_spec_len)
Eric Smith8c663262007-08-25 02:26:07 +00001406{
Eric Smith8c663262007-08-25 02:26:07 +00001407 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +00001408 InternalFormatSpec format;
1409
Eric Smith8c663262007-08-25 02:26:07 +00001410 /* check for the special case of zero length format spec, make
Eric Smith4a7d76d2008-05-30 18:10:19 +00001411 it equivalent to str(obj) */
1412 if (format_spec_len == 0) {
1413 result = STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +00001414 goto done;
1415 }
1416
1417 /* parse the format_spec */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001418 if (!parse_internal_render_format_spec(format_spec,
Eric Smithf8c8b6d2009-04-03 11:19:31 +00001419 format_spec_len,
1420 &format, '\0'))
Eric Smith8c663262007-08-25 02:26:07 +00001421 goto done;
1422
1423 /* type conversion? */
1424 switch (format.type) {
Eric Smith0923d1d2009-04-16 20:16:10 +00001425 case '\0': /* No format code: like 'g', but with at least one decimal. */
Eric Smith8c663262007-08-25 02:26:07 +00001426 case 'e':
1427 case 'E':
1428 case 'f':
1429 case 'F':
1430 case 'g':
1431 case 'G':
1432 case 'n':
1433 case '%':
1434 /* no conversion, already a float. do the formatting */
Eric Smith4a7d76d2008-05-30 18:10:19 +00001435 result = format_float_internal(obj, &format);
Eric Smith8c663262007-08-25 02:26:07 +00001436 break;
1437
1438 default:
1439 /* unknown */
Eric Smith5e5c0db2009-02-20 14:25:03 +00001440 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smith8c663262007-08-25 02:26:07 +00001441 goto done;
1442 }
1443
1444done:
Eric Smith8c663262007-08-25 02:26:07 +00001445 return result;
1446}
Eric Smith8fd3eba2008-02-17 19:48:00 +00001447#endif /* FORMAT_FLOAT */
Eric Smith58a42242009-04-30 01:00:33 +00001448
1449#ifdef FORMAT_COMPLEX
1450PyObject *
1451FORMAT_COMPLEX(PyObject *obj,
1452 STRINGLIB_CHAR *format_spec,
1453 Py_ssize_t format_spec_len)
1454{
1455 PyObject *result = NULL;
1456 InternalFormatSpec format;
1457
1458 /* check for the special case of zero length format spec, make
1459 it equivalent to str(obj) */
1460 if (format_spec_len == 0) {
1461 result = STRINGLIB_TOSTR(obj);
1462 goto done;
1463 }
1464
1465 /* parse the format_spec */
1466 if (!parse_internal_render_format_spec(format_spec,
1467 format_spec_len,
1468 &format, '\0'))
1469 goto done;
1470
1471 /* type conversion? */
1472 switch (format.type) {
1473 case '\0': /* No format code: like 'g', but with at least one decimal. */
1474 case 'e':
1475 case 'E':
1476 case 'f':
1477 case 'F':
1478 case 'g':
1479 case 'G':
1480 case 'n':
1481 /* no conversion, already a complex. do the formatting */
1482 result = format_complex_internal(obj, &format);
1483 break;
1484
1485 default:
1486 /* unknown */
1487 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1488 goto done;
1489 }
1490
1491done:
1492 return result;
1493}
1494#endif /* FORMAT_COMPLEX */