blob: 5cead660c4e2ecae18cca47f7366bf38d6f943ea [file] [log] [blame]
Eric Smitha9f7d622008-02-17 19:46:49 +00001/* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
3
Eric Smithaca19e62009-04-22 13:29:05 +00004#include <locale.h>
5
Eric Smitha9f7d622008-02-17 19:46:49 +00006/* Before including this, you must include either:
7 stringlib/unicodedefs.h
8 stringlib/stringdefs.h
9
10 Also, you should define the names:
11 FORMAT_STRING
12 FORMAT_LONG
13 FORMAT_FLOAT
Eric Smith9139cc62009-04-30 00:58:58 +000014 FORMAT_COMPLEX
Eric Smitha9f7d622008-02-17 19:46:49 +000015 to be whatever you want the public names of these functions to
16 be. These are the only non-static functions defined here.
17*/
18
Eric Smithe9fb6862009-02-20 14:02:36 +000019/* Raises an exception about an unknown presentation type for this
20 * type. */
21
22static void
23unknown_presentation_type(STRINGLIB_CHAR presentation_type,
24 const char* type_name)
25{
26#if STRINGLIB_IS_UNICODE
27 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
28 hence the two cases. If it is char, gcc complains that the
29 condition below is always true, hence the ifdef. */
30 if (presentation_type > 32 && presentation_type < 128)
31#endif
32 PyErr_Format(PyExc_ValueError,
33 "Unknown format code '%c' "
34 "for object of type '%.200s'",
35 presentation_type,
36 type_name);
37#if STRINGLIB_IS_UNICODE
38 else
39 PyErr_Format(PyExc_ValueError,
40 "Unknown format code '\\x%x' "
41 "for object of type '%.200s'",
42 (unsigned int)presentation_type,
43 type_name);
44#endif
45}
46
Eric Smitha9f7d622008-02-17 19:46:49 +000047/*
48 get_integer consumes 0 or more decimal digit characters from an
49 input string, updates *result with the corresponding positive
50 integer, and returns the number of digits consumed.
51
52 returns -1 on error.
53*/
54static int
55get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
56 Py_ssize_t *result)
57{
58 Py_ssize_t accumulator, digitval, oldaccumulator;
59 int numdigits;
60 accumulator = numdigits = 0;
61 for (;;(*ptr)++, numdigits++) {
62 if (*ptr >= end)
63 break;
64 digitval = STRINGLIB_TODECIMAL(**ptr);
65 if (digitval < 0)
66 break;
67 /*
68 This trick was copied from old Unicode format code. It's cute,
69 but would really suck on an old machine with a slow divide
70 implementation. Fortunately, in the normal case we do not
71 expect too many digits.
72 */
73 oldaccumulator = accumulator;
74 accumulator *= 10;
75 if ((accumulator+10)/10 != oldaccumulator+1) {
76 PyErr_Format(PyExc_ValueError,
77 "Too many decimal digits in format string");
78 return -1;
79 }
80 accumulator += digitval;
81 }
82 *result = accumulator;
83 return numdigits;
84}
85
86/************************************************************************/
87/*********** standard format specifier parsing **************************/
88/************************************************************************/
89
90/* returns true if this character is a specifier alignment token */
91Py_LOCAL_INLINE(int)
92is_alignment_token(STRINGLIB_CHAR c)
93{
94 switch (c) {
95 case '<': case '>': case '=': case '^':
96 return 1;
97 default:
98 return 0;
99 }
100}
101
102/* returns true if this character is a sign element */
103Py_LOCAL_INLINE(int)
104is_sign_element(STRINGLIB_CHAR c)
105{
106 switch (c) {
107 case ' ': case '+': case '-':
Eric Smitha9f7d622008-02-17 19:46:49 +0000108 return 1;
109 default:
110 return 0;
111 }
112}
113
114
115typedef struct {
116 STRINGLIB_CHAR fill_char;
117 STRINGLIB_CHAR align;
Eric Smithd0c84122008-07-15 10:10:07 +0000118 int alternate;
Eric Smitha9f7d622008-02-17 19:46:49 +0000119 STRINGLIB_CHAR sign;
120 Py_ssize_t width;
Eric Smithaca19e62009-04-22 13:29:05 +0000121 int thousands_separators;
Eric Smitha9f7d622008-02-17 19:46:49 +0000122 Py_ssize_t precision;
123 STRINGLIB_CHAR type;
124} InternalFormatSpec;
125
126/*
127 ptr points to the start of the format_spec, end points just past its end.
128 fills in format with the parsed information.
129 returns 1 on success, 0 on failure.
130 if failure, sets the exception
131*/
132static int
Eric Smithdc13b792008-05-30 18:10:04 +0000133parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
Eric Smithaca19e62009-04-22 13:29:05 +0000134 Py_ssize_t format_spec_len,
Eric Smitha9f7d622008-02-17 19:46:49 +0000135 InternalFormatSpec *format,
136 char default_type)
137{
Eric Smithdc13b792008-05-30 18:10:04 +0000138 STRINGLIB_CHAR *ptr = format_spec;
139 STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smitha9f7d622008-02-17 19:46:49 +0000140
141 /* end-ptr is used throughout this code to specify the length of
142 the input string */
143
Eric Smithaca19e62009-04-22 13:29:05 +0000144 Py_ssize_t consumed;
Eric Smitha9f7d622008-02-17 19:46:49 +0000145
146 format->fill_char = '\0';
147 format->align = '\0';
Eric Smithd0c84122008-07-15 10:10:07 +0000148 format->alternate = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000149 format->sign = '\0';
150 format->width = -1;
Eric Smithaca19e62009-04-22 13:29:05 +0000151 format->thousands_separators = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000152 format->precision = -1;
153 format->type = default_type;
154
155 /* If the second char is an alignment token,
156 then parse the fill char */
157 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
158 format->align = ptr[1];
159 format->fill_char = ptr[0];
160 ptr += 2;
161 }
162 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
163 format->align = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000164 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000165 }
166
167 /* Parse the various sign options */
168 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
169 format->sign = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000170 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000171 }
172
Eric Smitha5fa5a22008-07-16 00:11:49 +0000173 /* If the next character is #, we're in alternate mode. This only
174 applies to integers. */
175 if (end-ptr >= 1 && ptr[0] == '#') {
Eric Smithaca19e62009-04-22 13:29:05 +0000176 format->alternate = 1;
177 ++ptr;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000178 }
179
Eric Smitha9f7d622008-02-17 19:46:49 +0000180 /* The special case for 0-padding (backwards compat) */
181 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
182 format->fill_char = '0';
183 if (format->align == '\0') {
184 format->align = '=';
185 }
Eric Smith8a803dd2008-02-20 23:39:28 +0000186 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000187 }
188
Eric Smithaca19e62009-04-22 13:29:05 +0000189 consumed = get_integer(&ptr, end, &format->width);
190 if (consumed == -1)
191 /* Overflow error. Exception already set. */
192 return 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000193
Eric Smithaca19e62009-04-22 13:29:05 +0000194 /* If consumed is 0, we didn't consume any characters for the
195 width. In that case, reset the width to -1, because
196 get_integer() will have set it to zero. -1 is how we record
197 that the width wasn't specified. */
198 if (consumed == 0)
Eric Smitha9f7d622008-02-17 19:46:49 +0000199 format->width = -1;
Eric Smithaca19e62009-04-22 13:29:05 +0000200
201 /* Comma signifies add thousands separators */
202 if (end-ptr && ptr[0] == ',') {
203 format->thousands_separators = 1;
204 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000205 }
206
207 /* Parse field precision */
208 if (end-ptr && ptr[0] == '.') {
Eric Smith8a803dd2008-02-20 23:39:28 +0000209 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000210
Eric Smithaca19e62009-04-22 13:29:05 +0000211 consumed = get_integer(&ptr, end, &format->precision);
212 if (consumed == -1)
213 /* Overflow error. Exception already set. */
214 return 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000215
Eric Smithaca19e62009-04-22 13:29:05 +0000216 /* Not having a precision after a dot is an error. */
217 if (consumed == 0) {
Eric Smitha9f7d622008-02-17 19:46:49 +0000218 PyErr_Format(PyExc_ValueError,
219 "Format specifier missing precision");
220 return 0;
221 }
222
223 }
224
Eric Smithaca19e62009-04-22 13:29:05 +0000225 /* Finally, parse the type field. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000226
227 if (end-ptr > 1) {
Eric Smithaca19e62009-04-22 13:29:05 +0000228 /* More than one char remain, invalid conversion spec. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000229 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
230 return 0;
231 }
232
233 if (end-ptr == 1) {
234 format->type = ptr[0];
Eric Smith8a803dd2008-02-20 23:39:28 +0000235 ++ptr;
Eric Smitha9f7d622008-02-17 19:46:49 +0000236 }
237
Eric Smithaca19e62009-04-22 13:29:05 +0000238 /* Do as much validating as we can, just by looking at the format
239 specifier. Do not take into account what type of formatting
240 we're doing (int, float, string). */
241
242 if (format->thousands_separators) {
243 switch (format->type) {
244 case 'd':
245 case 'e':
246 case 'f':
247 case 'g':
248 case 'E':
249 case 'G':
250 case '%':
251 case 'F':
Eric Smithebafbb72009-04-22 16:20:47 +0000252 case '\0':
Eric Smithaca19e62009-04-22 13:29:05 +0000253 /* These are allowed. See PEP 378.*/
254 break;
255 default:
256 PyErr_Format(PyExc_ValueError,
257 "Cannot specify ',' with '%c'.", format->type);
258 return 0;
259 }
260 }
261
Eric Smitha9f7d622008-02-17 19:46:49 +0000262 return 1;
263}
264
Eric Smith9139cc62009-04-30 00:58:58 +0000265/* Calculate the padding needed. */
266static void
267calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
268 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
269 Py_ssize_t *n_total)
270{
271 if (width >= 0) {
272 if (nchars > width)
273 *n_total = nchars;
274 else
275 *n_total = width;
276 }
277 else {
278 /* not specified, use all of the chars and no more */
279 *n_total = nchars;
280 }
281
282 /* figure out how much leading space we need, based on the
283 aligning */
284 if (align == '>')
285 *n_lpadding = *n_total - nchars;
286 else if (align == '^')
287 *n_lpadding = (*n_total - nchars) / 2;
288 else
289 *n_lpadding = 0;
290
291 *n_rpadding = *n_total - nchars - *n_lpadding;
292}
293
294/* Do the padding, and return a pointer to where the caller-supplied
295 content goes. */
296static STRINGLIB_CHAR *
297fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
298 Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
299{
300 /* Pad on left. */
301 if (n_lpadding)
302 STRINGLIB_FILL(p, fill_char, n_lpadding);
303
304 /* Pad on right. */
305 if (n_rpadding)
306 STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
307
308 /* Pointer to the user content. */
309 return p + n_lpadding;
310}
311
312#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
Eric Smitha9f7d622008-02-17 19:46:49 +0000313/************************************************************************/
314/*********** common routines for numeric formatting *********************/
315/************************************************************************/
316
Eric Smithaca19e62009-04-22 13:29:05 +0000317/* Locale type codes. */
318#define LT_CURRENT_LOCALE 0
319#define LT_DEFAULT_LOCALE 1
320#define LT_NO_LOCALE 2
321
322/* Locale info needed for formatting integers and the part of floats
323 before and including the decimal. Note that locales only support
324 8-bit chars, not unicode. */
325typedef struct {
326 char *decimal_point;
327 char *thousands_sep;
328 char *grouping;
329} LocaleInfo;
330
Eric Smitha9f7d622008-02-17 19:46:49 +0000331/* describes the layout for an integer, see the comment in
Eric Smitha5fa5a22008-07-16 00:11:49 +0000332 calc_number_widths() for details */
Eric Smitha9f7d622008-02-17 19:46:49 +0000333typedef struct {
334 Py_ssize_t n_lpadding;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000335 Py_ssize_t n_prefix;
Eric Smitha9f7d622008-02-17 19:46:49 +0000336 Py_ssize_t n_spadding;
337 Py_ssize_t n_rpadding;
Eric Smithaca19e62009-04-22 13:29:05 +0000338 char sign;
339 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
340 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
341 any grouping chars. */
342 Py_ssize_t n_decimal; /* 0 if only an integer */
343 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
344 excluding the decimal itself, if
345 present. */
346
347 /* These 2 are not the widths of fields, but are needed by
348 STRINGLIB_GROUPING. */
349 Py_ssize_t n_digits; /* The number of digits before a decimal
350 or exponent. */
351 Py_ssize_t n_min_width; /* The min_width we used when we computed
352 the n_grouped_digits width. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000353} NumberFieldWidths;
354
Eric Smith9139cc62009-04-30 00:58:58 +0000355
Eric Smithaca19e62009-04-22 13:29:05 +0000356/* Given a number of the form:
357 digits[remainder]
358 where ptr points to the start and end points to the end, find where
359 the integer part ends. This could be a decimal, an exponent, both,
360 or neither.
361 If a decimal point is present, set *has_decimal and increment
362 remainder beyond it.
363 Results are undefined (but shouldn't crash) for improperly
364 formatted strings.
365*/
366static void
367parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
368 Py_ssize_t *n_remainder, int *has_decimal)
369{
370 STRINGLIB_CHAR *end = ptr + len;
371 STRINGLIB_CHAR *remainder;
372
373 while (ptr<end && isdigit(*ptr))
374 ++ptr;
375 remainder = ptr;
376
377 /* Does remainder start with a decimal point? */
378 *has_decimal = ptr<end && *remainder == '.';
379
380 /* Skip the decimal point. */
381 if (*has_decimal)
382 remainder++;
383
384 *n_remainder = end - remainder;
385}
386
Eric Smitha9f7d622008-02-17 19:46:49 +0000387/* not all fields of format are used. for example, precision is
388 unused. should this take discrete params in order to be more clear
389 about what it does? or is passing a single format parameter easier
390 and more efficient enough to justify a little obfuscation? */
Eric Smithaca19e62009-04-22 13:29:05 +0000391static Py_ssize_t
392calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
393 STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
394 Py_ssize_t n_number, Py_ssize_t n_remainder,
395 int has_decimal, const LocaleInfo *locale,
396 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000397{
Eric Smithaca19e62009-04-22 13:29:05 +0000398 Py_ssize_t n_non_digit_non_padding;
399 Py_ssize_t n_padding;
400
401 spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
Eric Smith4cb965c2008-07-16 18:29:51 +0000402 spec->n_lpadding = 0;
Eric Smithaca19e62009-04-22 13:29:05 +0000403 spec->n_prefix = n_prefix;
404 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
405 spec->n_remainder = n_remainder;
Eric Smith4cb965c2008-07-16 18:29:51 +0000406 spec->n_spadding = 0;
407 spec->n_rpadding = 0;
Eric Smithaca19e62009-04-22 13:29:05 +0000408 spec->sign = '\0';
409 spec->n_sign = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000410
411 /* the output will look like:
Eric Smithaca19e62009-04-22 13:29:05 +0000412 | |
413 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
414 | |
Eric Smitha9f7d622008-02-17 19:46:49 +0000415
Eric Smithaca19e62009-04-22 13:29:05 +0000416 sign is computed from format->sign and the actual
Eric Smitha9f7d622008-02-17 19:46:49 +0000417 sign of the number
418
Eric Smithd0c84122008-07-15 10:10:07 +0000419 prefix is given (it's for the '0x' prefix)
420
Eric Smitha9f7d622008-02-17 19:46:49 +0000421 digits is already known
422
423 the total width is either given, or computed from the
424 actual digits
425
426 only one of lpadding, spadding, and rpadding can be non-zero,
427 and it's calculated from the width and other fields
428 */
429
430 /* compute the various parts we're going to write */
Eric Smithaca19e62009-04-22 13:29:05 +0000431 switch (format->sign) {
432 case '+':
Eric Smitha9f7d622008-02-17 19:46:49 +0000433 /* always put a + or - */
Eric Smithaca19e62009-04-22 13:29:05 +0000434 spec->n_sign = 1;
435 spec->sign = (sign_char == '-' ? '-' : '+');
436 break;
437 case ' ':
438 spec->n_sign = 1;
439 spec->sign = (sign_char == '-' ? '-' : ' ');
440 break;
441 default:
442 /* Not specified, or the default (-) */
443 if (sign_char == '-') {
444 spec->n_sign = 1;
445 spec->sign = '-';
Eric Smitha9f7d622008-02-17 19:46:49 +0000446 }
447 }
448
Eric Smithaca19e62009-04-22 13:29:05 +0000449 /* The number of chars used for non-digits and non-padding. */
450 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
451 spec->n_remainder;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000452
Eric Smithaca19e62009-04-22 13:29:05 +0000453 /* min_width can go negative, that's okay. format->width == -1 means
454 we don't care. */
455 if (format->fill_char == '0')
456 spec->n_min_width = format->width - n_non_digit_non_padding;
457 else
458 spec->n_min_width = 0;
459
460 if (spec->n_digits == 0)
461 /* This case only occurs when using 'c' formatting, we need
462 to special case it because the grouping code always wants
463 to have at least one character. */
464 spec->n_grouped_digits = 0;
465 else
466 spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
467 spec->n_digits,
468 spec->n_min_width,
469 locale->grouping,
470 locale->thousands_sep);
471
472 /* Given the desired width and the total of digit and non-digit
473 space we consume, see if we need any padding. format->width can
474 be negative (meaning no padding), but this code still works in
475 that case. */
476 n_padding = format->width -
477 (n_non_digit_non_padding + spec->n_grouped_digits);
478 if (n_padding > 0) {
479 /* Some padding is needed. Determine if it's left, space, or right. */
480 switch (format->align) {
481 case '<':
482 spec->n_rpadding = n_padding;
483 break;
484 case '^':
485 spec->n_lpadding = n_padding / 2;
486 spec->n_rpadding = n_padding - spec->n_lpadding;
487 break;
488 case '=':
489 spec->n_spadding = n_padding;
490 break;
491 default:
492 /* Handles '>', plus catch-all just in case. */
493 spec->n_lpadding = n_padding;
494 break;
Eric Smitha9f7d622008-02-17 19:46:49 +0000495 }
496 }
Eric Smithaca19e62009-04-22 13:29:05 +0000497 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
498 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
499 spec->n_remainder + spec->n_rpadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000500}
501
Eric Smithaca19e62009-04-22 13:29:05 +0000502/* Fill in the digit parts of a numbers's string representation,
503 as determined in calc_number_widths().
504 No error checking, since we know the buffer is the correct size. */
505static void
506fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
507 STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
508 STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
509 LocaleInfo *locale, int toupper)
Eric Smitha9f7d622008-02-17 19:46:49 +0000510{
Eric Smithaca19e62009-04-22 13:29:05 +0000511 /* Used to keep track of digits, decimal, and remainder. */
512 STRINGLIB_CHAR *p = digits;
513
514#ifndef NDEBUG
515 Py_ssize_t r;
516#endif
Eric Smitha9f7d622008-02-17 19:46:49 +0000517
518 if (spec->n_lpadding) {
Eric Smithaca19e62009-04-22 13:29:05 +0000519 STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
520 buf += spec->n_lpadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000521 }
Eric Smithaca19e62009-04-22 13:29:05 +0000522 if (spec->n_sign == 1) {
523 *buf++ = spec->sign;
Eric Smitha9f7d622008-02-17 19:46:49 +0000524 }
Eric Smitha5fa5a22008-07-16 00:11:49 +0000525 if (spec->n_prefix) {
Eric Smithaca19e62009-04-22 13:29:05 +0000526 memmove(buf,
527 prefix,
528 spec->n_prefix * sizeof(STRINGLIB_CHAR));
529 if (toupper) {
530 Py_ssize_t t;
531 for (t = 0; t < spec->n_prefix; ++t)
532 buf[t] = STRINGLIB_TOUPPER(buf[t]);
533 }
534 buf += spec->n_prefix;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000535 }
Eric Smitha9f7d622008-02-17 19:46:49 +0000536 if (spec->n_spadding) {
Eric Smithaca19e62009-04-22 13:29:05 +0000537 STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
538 buf += spec->n_spadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000539 }
Eric Smithaca19e62009-04-22 13:29:05 +0000540
541 /* Only for type 'c' special case, it has no digits. */
542 if (spec->n_digits != 0) {
543 /* Fill the digits with InsertThousandsGrouping. */
544#ifndef NDEBUG
545 r =
546#endif
547 STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
548 spec->n_digits, spec->n_min_width,
549 locale->grouping, locale->thousands_sep);
550#ifndef NDEBUG
551 assert(r == spec->n_grouped_digits);
552#endif
553 p += spec->n_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000554 }
Eric Smithaca19e62009-04-22 13:29:05 +0000555 if (toupper) {
556 Py_ssize_t t;
557 for (t = 0; t < spec->n_grouped_digits; ++t)
558 buf[t] = STRINGLIB_TOUPPER(buf[t]);
559 }
560 buf += spec->n_grouped_digits;
561
562 if (spec->n_decimal) {
563 Py_ssize_t t;
564 for (t = 0; t < spec->n_decimal; ++t)
565 buf[t] = locale->decimal_point[t];
566 buf += spec->n_decimal;
567 p += 1;
568 }
569
570 if (spec->n_remainder) {
571 memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
572 buf += spec->n_remainder;
573 p += spec->n_remainder;
574 }
575
Eric Smitha9f7d622008-02-17 19:46:49 +0000576 if (spec->n_rpadding) {
Eric Smithaca19e62009-04-22 13:29:05 +0000577 STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
578 buf += spec->n_rpadding;
Eric Smitha9f7d622008-02-17 19:46:49 +0000579 }
Eric Smitha9f7d622008-02-17 19:46:49 +0000580}
Eric Smithaca19e62009-04-22 13:29:05 +0000581
582static char no_grouping[1] = {CHAR_MAX};
583
584/* Find the decimal point character(s?), thousands_separator(s?), and
585 grouping description, either for the current locale if type is
586 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
587 none if LT_NO_LOCALE. */
588static void
589get_locale_info(int type, LocaleInfo *locale_info)
590{
591 switch (type) {
592 case LT_CURRENT_LOCALE: {
593 struct lconv *locale_data = localeconv();
594 locale_info->decimal_point = locale_data->decimal_point;
595 locale_info->thousands_sep = locale_data->thousands_sep;
596 locale_info->grouping = locale_data->grouping;
597 break;
598 }
599 case LT_DEFAULT_LOCALE:
600 locale_info->decimal_point = ".";
601 locale_info->thousands_sep = ",";
602 locale_info->grouping = "\3"; /* Group every 3 characters,
603 trailing 0 means repeat
604 infinitely. */
605 break;
606 case LT_NO_LOCALE:
607 locale_info->decimal_point = ".";
608 locale_info->thousands_sep = "";
609 locale_info->grouping = no_grouping;
610 break;
611 default:
612 assert(0);
613 }
614}
615
Eric Smith9139cc62009-04-30 00:58:58 +0000616#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
Eric Smitha9f7d622008-02-17 19:46:49 +0000617
618/************************************************************************/
619/*********** string formatting ******************************************/
620/************************************************************************/
621
622static PyObject *
623format_string_internal(PyObject *value, const InternalFormatSpec *format)
624{
Eric Smitha9f7d622008-02-17 19:46:49 +0000625 Py_ssize_t lpad;
Eric Smith9139cc62009-04-30 00:58:58 +0000626 Py_ssize_t rpad;
627 Py_ssize_t total;
628 STRINGLIB_CHAR *p;
Eric Smitha9f7d622008-02-17 19:46:49 +0000629 Py_ssize_t len = STRINGLIB_LEN(value);
630 PyObject *result = NULL;
631
632 /* sign is not allowed on strings */
633 if (format->sign != '\0') {
634 PyErr_SetString(PyExc_ValueError,
635 "Sign not allowed in string format specifier");
636 goto done;
637 }
638
Eric Smithd0c84122008-07-15 10:10:07 +0000639 /* alternate is not allowed on strings */
640 if (format->alternate) {
641 PyErr_SetString(PyExc_ValueError,
642 "Alternate form (#) not allowed in string format "
Eric Smithaca19e62009-04-22 13:29:05 +0000643 "specifier");
Eric Smithd0c84122008-07-15 10:10:07 +0000644 goto done;
645 }
646
Eric Smitha9f7d622008-02-17 19:46:49 +0000647 /* '=' alignment not allowed on strings */
648 if (format->align == '=') {
649 PyErr_SetString(PyExc_ValueError,
650 "'=' alignment not allowed "
651 "in string format specifier");
652 goto done;
653 }
654
655 /* if precision is specified, output no more that format.precision
656 characters */
657 if (format->precision >= 0 && len >= format->precision) {
658 len = format->precision;
659 }
660
Eric Smith9139cc62009-04-30 00:58:58 +0000661 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
Eric Smitha9f7d622008-02-17 19:46:49 +0000662
663 /* allocate the resulting string */
Eric Smith9139cc62009-04-30 00:58:58 +0000664 result = STRINGLIB_NEW(NULL, total);
Eric Smitha9f7d622008-02-17 19:46:49 +0000665 if (result == NULL)
666 goto done;
667
Eric Smith9139cc62009-04-30 00:58:58 +0000668 /* Write into that space. First the padding. */
669 p = fill_padding(STRINGLIB_STR(result), len,
670 format->fill_char=='\0'?' ':format->fill_char,
671 lpad, rpad);
Eric Smitha9f7d622008-02-17 19:46:49 +0000672
Eric Smith9139cc62009-04-30 00:58:58 +0000673 /* Then the source string. */
674 memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
Eric Smitha9f7d622008-02-17 19:46:49 +0000675
676done:
677 return result;
678}
679
680
681/************************************************************************/
682/*********** long formatting ********************************************/
683/************************************************************************/
684
685#if defined FORMAT_LONG || defined FORMAT_INT
686typedef PyObject*
687(*IntOrLongToString)(PyObject *value, int base);
688
689static PyObject *
690format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
Eric Smithaca19e62009-04-22 13:29:05 +0000691 IntOrLongToString tostring)
Eric Smitha9f7d622008-02-17 19:46:49 +0000692{
693 PyObject *result = NULL;
694 PyObject *tmp = NULL;
695 STRINGLIB_CHAR *pnumeric_chars;
696 STRINGLIB_CHAR numeric_char;
Eric Smithaca19e62009-04-22 13:29:05 +0000697 STRINGLIB_CHAR sign_char = '\0';
Eric Smitha9f7d622008-02-17 19:46:49 +0000698 Py_ssize_t n_digits; /* count of digits need from the computed
699 string */
Eric Smithaca19e62009-04-22 13:29:05 +0000700 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
701 produces non-digits */
Eric Smitha5fa5a22008-07-16 00:11:49 +0000702 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
Eric Smithaca19e62009-04-22 13:29:05 +0000703 Py_ssize_t n_total;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000704 STRINGLIB_CHAR *prefix = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +0000705 NumberFieldWidths spec;
706 long x;
707
Eric Smithaca19e62009-04-22 13:29:05 +0000708 /* Locale settings, either from the actual locale or
709 from a hard-code pseudo-locale */
710 LocaleInfo locale;
711
Eric Smitha9f7d622008-02-17 19:46:49 +0000712 /* no precision allowed on integers */
713 if (format->precision != -1) {
714 PyErr_SetString(PyExc_ValueError,
715 "Precision not allowed in integer format specifier");
716 goto done;
717 }
718
Eric Smitha9f7d622008-02-17 19:46:49 +0000719 /* special case for character formatting */
720 if (format->type == 'c') {
721 /* error to specify a sign */
722 if (format->sign != '\0') {
723 PyErr_SetString(PyExc_ValueError,
724 "Sign not allowed with integer"
725 " format specifier 'c'");
726 goto done;
727 }
728
Eric Smithaca19e62009-04-22 13:29:05 +0000729 /* Error to specify a comma. */
730 if (format->thousands_separators) {
731 PyErr_SetString(PyExc_ValueError,
732 "Thousands separators not allowed with integer"
733 " format specifier 'c'");
734 goto done;
735 }
736
Eric Smitha9f7d622008-02-17 19:46:49 +0000737 /* taken from unicodeobject.c formatchar() */
738 /* Integer input truncated to a character */
739/* XXX: won't work for int */
740 x = PyLong_AsLong(value);
741 if (x == -1 && PyErr_Occurred())
742 goto done;
743#ifdef Py_UNICODE_WIDE
744 if (x < 0 || x > 0x10ffff) {
745 PyErr_SetString(PyExc_OverflowError,
746 "%c arg not in range(0x110000) "
747 "(wide Python build)");
748 goto done;
749 }
750#else
751 if (x < 0 || x > 0xffff) {
752 PyErr_SetString(PyExc_OverflowError,
753 "%c arg not in range(0x10000) "
754 "(narrow Python build)");
755 goto done;
756 }
757#endif
Eric Smithaca19e62009-04-22 13:29:05 +0000758 numeric_char = (STRINGLIB_CHAR)x;
759 pnumeric_chars = &numeric_char;
Eric Smitha9f7d622008-02-17 19:46:49 +0000760 n_digits = 1;
Eric Smithaca19e62009-04-22 13:29:05 +0000761
762 /* As a sort-of hack, we tell calc_number_widths that we only
763 have "remainder" characters. calc_number_widths thinks
764 these are characters that don't get formatted, only copied
765 into the output string. We do this for 'c' formatting,
766 because the characters are likely to be non-digits. */
767 n_remainder = 1;
Eric Smitha9f7d622008-02-17 19:46:49 +0000768 }
769 else {
770 int base;
Eric Smithaca19e62009-04-22 13:29:05 +0000771 int leading_chars_to_skip = 0; /* Number of characters added by
772 PyNumber_ToBase that we want to
773 skip over. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000774
775 /* Compute the base and how many characters will be added by
776 PyNumber_ToBase */
777 switch (format->type) {
778 case 'b':
779 base = 2;
Eric Smithaca19e62009-04-22 13:29:05 +0000780 leading_chars_to_skip = 2; /* 0b */
Eric Smitha9f7d622008-02-17 19:46:49 +0000781 break;
782 case 'o':
783 base = 8;
Eric Smithaca19e62009-04-22 13:29:05 +0000784 leading_chars_to_skip = 2; /* 0o */
Eric Smitha9f7d622008-02-17 19:46:49 +0000785 break;
786 case 'x':
787 case 'X':
788 base = 16;
Eric Smithaca19e62009-04-22 13:29:05 +0000789 leading_chars_to_skip = 2; /* 0x */
Eric Smitha9f7d622008-02-17 19:46:49 +0000790 break;
791 default: /* shouldn't be needed, but stops a compiler warning */
792 case 'd':
Eric Smithcf537ff2008-05-11 19:52:48 +0000793 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +0000794 base = 10;
Eric Smitha9f7d622008-02-17 19:46:49 +0000795 break;
796 }
797
Eric Smithaca19e62009-04-22 13:29:05 +0000798 /* The number of prefix chars is the same as the leading
799 chars to skip */
800 if (format->alternate)
801 n_prefix = leading_chars_to_skip;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000802
Eric Smitha9f7d622008-02-17 19:46:49 +0000803 /* Do the hard part, converting to a string in a given base */
Eric Smithaca19e62009-04-22 13:29:05 +0000804 tmp = tostring(value, base);
Eric Smitha9f7d622008-02-17 19:46:49 +0000805 if (tmp == NULL)
806 goto done;
807
Eric Smithaca19e62009-04-22 13:29:05 +0000808 pnumeric_chars = STRINGLIB_STR(tmp);
Eric Smitha9f7d622008-02-17 19:46:49 +0000809 n_digits = STRINGLIB_LEN(tmp);
810
Eric Smithaca19e62009-04-22 13:29:05 +0000811 prefix = pnumeric_chars;
Eric Smitha5fa5a22008-07-16 00:11:49 +0000812
Eric Smithaca19e62009-04-22 13:29:05 +0000813 /* Remember not to modify what pnumeric_chars points to. it
814 might be interned. Only modify it after we copy it into a
815 newly allocated output buffer. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000816
817 /* Is a sign character present in the output? If so, remember it
818 and skip it */
Eric Smithaca19e62009-04-22 13:29:05 +0000819 if (pnumeric_chars[0] == '-') {
820 sign_char = pnumeric_chars[0];
821 ++prefix;
822 ++leading_chars_to_skip;
Eric Smitha9f7d622008-02-17 19:46:49 +0000823 }
824
Eric Smithaca19e62009-04-22 13:29:05 +0000825 /* Skip over the leading chars (0x, 0b, etc.) */
826 n_digits -= leading_chars_to_skip;
827 pnumeric_chars += leading_chars_to_skip;
Eric Smitha9f7d622008-02-17 19:46:49 +0000828 }
829
Eric Smithaca19e62009-04-22 13:29:05 +0000830 /* Determine the grouping, separator, and decimal point, if any. */
831 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
832 (format->thousands_separators ?
833 LT_DEFAULT_LOCALE :
834 LT_NO_LOCALE),
835 &locale);
Eric Smithcf537ff2008-05-11 19:52:48 +0000836
Eric Smithaca19e62009-04-22 13:29:05 +0000837 /* Calculate how much memory we'll need. */
838 n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
839 n_digits, n_remainder, 0, &locale, format);
Eric Smith5dce7e92008-06-24 11:11:59 +0000840
Eric Smithaca19e62009-04-22 13:29:05 +0000841 /* Allocate the memory. */
842 result = STRINGLIB_NEW(NULL, n_total);
Eric Smitha9f7d622008-02-17 19:46:49 +0000843 if (!result)
Eric Smithaca19e62009-04-22 13:29:05 +0000844 goto done;
Eric Smitha9f7d622008-02-17 19:46:49 +0000845
Eric Smithaca19e62009-04-22 13:29:05 +0000846 /* Populate the memory. */
847 fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
848 prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
849 &locale, format->type == 'X');
Eric Smitha5fa5a22008-07-16 00:11:49 +0000850
Eric Smitha9f7d622008-02-17 19:46:49 +0000851done:
852 Py_XDECREF(tmp);
853 return result;
854}
855#endif /* defined FORMAT_LONG || defined FORMAT_INT */
856
857/************************************************************************/
858/*********** float formatting *******************************************/
859/************************************************************************/
860
861#ifdef FORMAT_FLOAT
862#if STRINGLIB_IS_UNICODE
Eric Smithaca19e62009-04-22 13:29:05 +0000863static void
864strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
Eric Smitha9f7d622008-02-17 19:46:49 +0000865{
Eric Smithaca19e62009-04-22 13:29:05 +0000866 Py_ssize_t i;
867 for (i = 0; i < len; ++i)
868 buffer[i] = (Py_UNICODE)charbuffer[i];
Eric Smitha9f7d622008-02-17 19:46:49 +0000869}
870#endif
871
Eric Smitha9f7d622008-02-17 19:46:49 +0000872/* much of this is taken from unicodeobject.c */
Eric Smitha9f7d622008-02-17 19:46:49 +0000873static PyObject *
Eric Smith8a803dd2008-02-20 23:39:28 +0000874format_float_internal(PyObject *value,
Eric Smithaca19e62009-04-22 13:29:05 +0000875 const InternalFormatSpec *format)
Eric Smitha9f7d622008-02-17 19:46:49 +0000876{
Eric Smithaca19e62009-04-22 13:29:05 +0000877 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
Eric Smitha9f7d622008-02-17 19:46:49 +0000878 Py_ssize_t n_digits;
Eric Smithaca19e62009-04-22 13:29:05 +0000879 Py_ssize_t n_remainder;
880 Py_ssize_t n_total;
881 int has_decimal;
882 double val;
Eric Smitha9f7d622008-02-17 19:46:49 +0000883 Py_ssize_t precision = format->precision;
Eric Smitha985a3a2009-05-05 18:26:08 +0000884 Py_ssize_t default_precision = 6;
Eric Smithaca19e62009-04-22 13:29:05 +0000885 STRINGLIB_CHAR type = format->type;
886 int add_pct = 0;
Eric Smitha9f7d622008-02-17 19:46:49 +0000887 STRINGLIB_CHAR *p;
888 NumberFieldWidths spec;
Eric Smithaca19e62009-04-22 13:29:05 +0000889 int flags = 0;
890 PyObject *result = NULL;
891 STRINGLIB_CHAR sign_char = '\0';
892 int float_type; /* Used to see if we have a nan, inf, or regular float. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000893
894#if STRINGLIB_IS_UNICODE
Eric Smithaca19e62009-04-22 13:29:05 +0000895 Py_UNICODE *unicode_tmp = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +0000896#endif
897
Eric Smithaca19e62009-04-22 13:29:05 +0000898 /* Locale settings, either from the actual locale or
899 from a hard-code pseudo-locale */
900 LocaleInfo locale;
901
902 /* Alternate is not allowed on floats. */
Eric Smithd0c84122008-07-15 10:10:07 +0000903 if (format->alternate) {
904 PyErr_SetString(PyExc_ValueError,
905 "Alternate form (#) not allowed in float format "
Eric Smithaca19e62009-04-22 13:29:05 +0000906 "specifier");
Eric Smithd0c84122008-07-15 10:10:07 +0000907 goto done;
908 }
909
Eric Smithaca19e62009-04-22 13:29:05 +0000910 if (type == '\0') {
Eric Smitha985a3a2009-05-05 18:26:08 +0000911 /* Omitted type specifier. This is like 'g' but with at least one
912 digit after the decimal point, and different default precision.*/
Eric Smithaca19e62009-04-22 13:29:05 +0000913 type = 'g';
Eric Smitha985a3a2009-05-05 18:26:08 +0000914 default_precision = PyFloat_STR_PRECISION;
Eric Smithaca19e62009-04-22 13:29:05 +0000915 flags |= Py_DTSF_ADD_DOT_0;
916 }
917
918 if (type == 'n')
919 /* 'n' is the same as 'g', except for the locale used to
920 format the result. We take care of that later. */
921 type = 'g';
Eric Smitha9f7d622008-02-17 19:46:49 +0000922
Eric Smithe94a8262009-07-27 01:58:25 +0000923#if PY_VERSION_HEX < 0x0301000
Eric Smithd6c393a2008-07-17 19:49:47 +0000924 /* 'F' is the same as 'f', per the PEP */
Eric Smithe94a8262009-07-27 01:58:25 +0000925 /* This is no longer the case in 3.x */
Eric Smithd6c393a2008-07-17 19:49:47 +0000926 if (type == 'F')
927 type = 'f';
Eric Smithe94a8262009-07-27 01:58:25 +0000928#endif
Eric Smithd6c393a2008-07-17 19:49:47 +0000929
Eric Smithaca19e62009-04-22 13:29:05 +0000930 val = PyFloat_AsDouble(value);
931 if (val == -1.0 && PyErr_Occurred())
Eric Smitha9f7d622008-02-17 19:46:49 +0000932 goto done;
933
934 if (type == '%') {
935 type = 'f';
Eric Smithaca19e62009-04-22 13:29:05 +0000936 val *= 100;
937 add_pct = 1;
Eric Smitha9f7d622008-02-17 19:46:49 +0000938 }
939
940 if (precision < 0)
Eric Smitha985a3a2009-05-05 18:26:08 +0000941 precision = default_precision;
Eric Smith27f204d2009-05-02 09:58:09 +0000942
943#if PY_VERSION_HEX < 0x03010000
944 /* 3.1 no longer converts large 'f' to 'g'. */
Eric Smithaca19e62009-04-22 13:29:05 +0000945 if ((type == 'f' || type == 'F') && fabs(val) >= 1e50)
Eric Smithd6c393a2008-07-17 19:49:47 +0000946 type = 'g';
Eric Smith27f204d2009-05-02 09:58:09 +0000947#endif
Eric Smitha9f7d622008-02-17 19:46:49 +0000948
Eric Smithaca19e62009-04-22 13:29:05 +0000949 /* Cast "type", because if we're in unicode we need to pass a
950 8-bit char. This is safe, because we've restricted what "type"
951 can be. */
952 buf = PyOS_double_to_string(val, (char)type, precision, flags,
953 &float_type);
954 if (buf == NULL)
955 goto done;
956 n_digits = strlen(buf);
Eric Smitha9f7d622008-02-17 19:46:49 +0000957
Eric Smithaca19e62009-04-22 13:29:05 +0000958 if (add_pct) {
959 /* We know that buf has a trailing zero (since we just called
960 strlen() on it), and we don't use that fact any more. So we
961 can just write over the trailing zero. */
962 buf[n_digits] = '%';
963 n_digits += 1;
964 }
Eric Smitha9f7d622008-02-17 19:46:49 +0000965
Eric Smithaca19e62009-04-22 13:29:05 +0000966 /* Since there is no unicode version of PyOS_double_to_string,
967 just use the 8 bit version and then convert to unicode. */
Eric Smitha9f7d622008-02-17 19:46:49 +0000968#if STRINGLIB_IS_UNICODE
Eric Smithaca19e62009-04-22 13:29:05 +0000969 unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
970 if (unicode_tmp == NULL) {
971 PyErr_NoMemory();
972 goto done;
973 }
974 strtounicode(unicode_tmp, buf, n_digits);
975 p = unicode_tmp;
Eric Smitha9f7d622008-02-17 19:46:49 +0000976#else
Eric Smithaca19e62009-04-22 13:29:05 +0000977 p = buf;
Eric Smitha9f7d622008-02-17 19:46:49 +0000978#endif
979
Eric Smithaca19e62009-04-22 13:29:05 +0000980 /* Is a sign character present in the output? If so, remember it
Eric Smitha9f7d622008-02-17 19:46:49 +0000981 and skip it */
Eric Smithaca19e62009-04-22 13:29:05 +0000982 if (*p == '-') {
983 sign_char = *p;
Eric Smith8a803dd2008-02-20 23:39:28 +0000984 ++p;
985 --n_digits;
Eric Smitha9f7d622008-02-17 19:46:49 +0000986 }
987
Eric Smithaca19e62009-04-22 13:29:05 +0000988 /* Determine if we have any "remainder" (after the digits, might include
989 decimal or exponent or both (or neither)) */
990 parse_number(p, n_digits, &n_remainder, &has_decimal);
Eric Smitha9f7d622008-02-17 19:46:49 +0000991
Eric Smithaca19e62009-04-22 13:29:05 +0000992 /* Determine the grouping, separator, and decimal point, if any. */
993 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
994 (format->thousands_separators ?
995 LT_DEFAULT_LOCALE :
996 LT_NO_LOCALE),
997 &locale);
998
999 /* Calculate how much memory we'll need. */
1000 n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
1001 n_remainder, has_decimal, &locale, format);
1002
1003 /* Allocate the memory. */
1004 result = STRINGLIB_NEW(NULL, n_total);
Eric Smitha9f7d622008-02-17 19:46:49 +00001005 if (result == NULL)
1006 goto done;
1007
Eric Smithaca19e62009-04-22 13:29:05 +00001008 /* Populate the memory. */
1009 fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
1010 format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
1011 0);
Eric Smitha9f7d622008-02-17 19:46:49 +00001012
1013done:
Eric Smithaca19e62009-04-22 13:29:05 +00001014 PyMem_Free(buf);
1015#if STRINGLIB_IS_UNICODE
1016 PyMem_Free(unicode_tmp);
1017#endif
Eric Smitha9f7d622008-02-17 19:46:49 +00001018 return result;
1019}
Eric Smitha9f7d622008-02-17 19:46:49 +00001020#endif /* FORMAT_FLOAT */
1021
1022/************************************************************************/
Eric Smith9139cc62009-04-30 00:58:58 +00001023/*********** complex formatting *****************************************/
1024/************************************************************************/
1025
1026#ifdef FORMAT_COMPLEX
1027
1028static PyObject *
1029format_complex_internal(PyObject *value,
1030 const InternalFormatSpec *format)
1031{
1032 double re;
1033 double im;
1034 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1035 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1036
1037 InternalFormatSpec tmp_format = *format;
1038 Py_ssize_t n_re_digits;
1039 Py_ssize_t n_im_digits;
1040 Py_ssize_t n_re_remainder;
1041 Py_ssize_t n_im_remainder;
1042 Py_ssize_t n_re_total;
1043 Py_ssize_t n_im_total;
1044 int re_has_decimal;
1045 int im_has_decimal;
1046 Py_ssize_t precision = format->precision;
Eric Smitha985a3a2009-05-05 18:26:08 +00001047 Py_ssize_t default_precision = 6;
Eric Smith9139cc62009-04-30 00:58:58 +00001048 STRINGLIB_CHAR type = format->type;
1049 STRINGLIB_CHAR *p_re;
1050 STRINGLIB_CHAR *p_im;
1051 NumberFieldWidths re_spec;
1052 NumberFieldWidths im_spec;
1053 int flags = 0;
1054 PyObject *result = NULL;
1055 STRINGLIB_CHAR *p;
1056 STRINGLIB_CHAR re_sign_char = '\0';
1057 STRINGLIB_CHAR im_sign_char = '\0';
1058 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1059 int im_float_type;
1060 int add_parens = 0;
1061 int skip_re = 0;
1062 Py_ssize_t lpad;
1063 Py_ssize_t rpad;
1064 Py_ssize_t total;
1065
1066#if STRINGLIB_IS_UNICODE
1067 Py_UNICODE *re_unicode_tmp = NULL;
1068 Py_UNICODE *im_unicode_tmp = NULL;
1069#endif
1070
1071 /* Locale settings, either from the actual locale or
1072 from a hard-code pseudo-locale */
1073 LocaleInfo locale;
1074
1075 /* Alternate is not allowed on complex. */
1076 if (format->alternate) {
1077 PyErr_SetString(PyExc_ValueError,
1078 "Alternate form (#) not allowed in complex format "
1079 "specifier");
1080 goto done;
1081 }
1082
1083 /* Neither is zero pading. */
1084 if (format->fill_char == '0') {
1085 PyErr_SetString(PyExc_ValueError,
1086 "Zero padding is not allowed in complex format "
1087 "specifier");
1088 goto done;
1089 }
1090
1091 /* Neither is '=' alignment . */
1092 if (format->align == '=') {
1093 PyErr_SetString(PyExc_ValueError,
1094 "'=' alignment flag is not allowed in complex format "
1095 "specifier");
1096 goto done;
1097 }
1098
1099 re = PyComplex_RealAsDouble(value);
1100 if (re == -1.0 && PyErr_Occurred())
1101 goto done;
1102 im = PyComplex_ImagAsDouble(value);
1103 if (im == -1.0 && PyErr_Occurred())
1104 goto done;
1105
1106 if (type == '\0') {
1107 /* Omitted type specifier. Should be like str(self). */
1108 type = 'g';
Eric Smitha985a3a2009-05-05 18:26:08 +00001109 default_precision = PyFloat_STR_PRECISION;
Eric Smith9139cc62009-04-30 00:58:58 +00001110 add_parens = 1;
1111 if (re == 0.0)
1112 skip_re = 1;
1113 }
1114
1115 if (type == 'n')
1116 /* 'n' is the same as 'g', except for the locale used to
1117 format the result. We take care of that later. */
1118 type = 'g';
1119
Eric Smithe94a8262009-07-27 01:58:25 +00001120#if PY_VERSION_HEX < 0x03010000
1121 /* This is no longer the case in 3.x */
Eric Smith9139cc62009-04-30 00:58:58 +00001122 /* 'F' is the same as 'f', per the PEP */
1123 if (type == 'F')
1124 type = 'f';
Eric Smithe94a8262009-07-27 01:58:25 +00001125#endif
Eric Smith9139cc62009-04-30 00:58:58 +00001126
1127 if (precision < 0)
Eric Smitha985a3a2009-05-05 18:26:08 +00001128 precision = default_precision;
Eric Smith9139cc62009-04-30 00:58:58 +00001129
1130 /* Cast "type", because if we're in unicode we need to pass a
1131 8-bit char. This is safe, because we've restricted what "type"
1132 can be. */
1133 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1134 &re_float_type);
1135 if (re_buf == NULL)
1136 goto done;
1137 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1138 &im_float_type);
1139 if (im_buf == NULL)
1140 goto done;
1141
1142 n_re_digits = strlen(re_buf);
1143 n_im_digits = strlen(im_buf);
1144
1145 /* Since there is no unicode version of PyOS_double_to_string,
1146 just use the 8 bit version and then convert to unicode. */
1147#if STRINGLIB_IS_UNICODE
1148 re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
1149 if (re_unicode_tmp == NULL) {
1150 PyErr_NoMemory();
1151 goto done;
1152 }
1153 strtounicode(re_unicode_tmp, re_buf, n_re_digits);
1154 p_re = re_unicode_tmp;
1155
1156 im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
1157 if (im_unicode_tmp == NULL) {
1158 PyErr_NoMemory();
1159 goto done;
1160 }
1161 strtounicode(im_unicode_tmp, im_buf, n_im_digits);
1162 p_im = im_unicode_tmp;
1163#else
1164 p_re = re_buf;
1165 p_im = im_buf;
1166#endif
1167
1168 /* Is a sign character present in the output? If so, remember it
1169 and skip it */
1170 if (*p_re == '-') {
1171 re_sign_char = *p_re;
1172 ++p_re;
1173 --n_re_digits;
1174 }
1175 if (*p_im == '-') {
1176 im_sign_char = *p_im;
1177 ++p_im;
1178 --n_im_digits;
1179 }
1180
1181 /* Determine if we have any "remainder" (after the digits, might include
1182 decimal or exponent or both (or neither)) */
1183 parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
1184 parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
1185
1186 /* Determine the grouping, separator, and decimal point, if any. */
1187 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1188 (format->thousands_separators ?
1189 LT_DEFAULT_LOCALE :
1190 LT_NO_LOCALE),
1191 &locale);
1192
1193 /* Turn off any padding. We'll do it later after we've composed
1194 the numbers without padding. */
1195 tmp_format.fill_char = '\0';
1196 tmp_format.align = '\0';
1197 tmp_format.width = -1;
1198
1199 /* Calculate how much memory we'll need. */
1200 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
1201 n_re_digits, n_re_remainder,
1202 re_has_decimal, &locale, &tmp_format);
1203
1204 /* Same formatting, but always include a sign. */
1205 tmp_format.sign = '+';
1206 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
1207 n_im_digits, n_im_remainder,
1208 im_has_decimal, &locale, &tmp_format);
1209
1210 if (skip_re)
1211 n_re_total = 0;
1212
1213 /* Add 1 for the 'j', and optionally 2 for parens. */
1214 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1215 format->width, format->align, &lpad, &rpad, &total);
1216
1217 result = STRINGLIB_NEW(NULL, total);
1218 if (result == NULL)
1219 goto done;
1220
1221 /* Populate the memory. First, the padding. */
1222 p = fill_padding(STRINGLIB_STR(result),
1223 n_re_total + n_im_total + 1 + add_parens * 2,
1224 format->fill_char=='\0' ? ' ' : format->fill_char,
1225 lpad, rpad);
1226
1227 if (add_parens)
1228 *p++ = '(';
1229
1230 if (!skip_re) {
1231 fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
1232 p += n_re_total;
1233 }
1234 fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
1235 p += n_im_total;
1236 *p++ = 'j';
1237
1238 if (add_parens)
1239 *p++ = ')';
1240
1241done:
1242 PyMem_Free(re_buf);
1243 PyMem_Free(im_buf);
1244#if STRINGLIB_IS_UNICODE
1245 PyMem_Free(re_unicode_tmp);
1246 PyMem_Free(im_unicode_tmp);
1247#endif
1248 return result;
1249}
1250#endif /* FORMAT_COMPLEX */
1251
1252/************************************************************************/
Eric Smitha9f7d622008-02-17 19:46:49 +00001253/*********** built in formatters ****************************************/
1254/************************************************************************/
Eric Smitha9f7d622008-02-17 19:46:49 +00001255PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +00001256FORMAT_STRING(PyObject *obj,
Eric Smithaca19e62009-04-22 13:29:05 +00001257 STRINGLIB_CHAR *format_spec,
1258 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +00001259{
Eric Smitha9f7d622008-02-17 19:46:49 +00001260 InternalFormatSpec format;
Eric Smithdc13b792008-05-30 18:10:04 +00001261 PyObject *result = NULL;
Eric Smitha9f7d622008-02-17 19:46:49 +00001262
1263 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +00001264 it equivalent to str(obj) */
1265 if (format_spec_len == 0) {
1266 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +00001267 goto done;
1268 }
1269
Eric Smitha9f7d622008-02-17 19:46:49 +00001270 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +00001271 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
Eric Smithaca19e62009-04-22 13:29:05 +00001272 &format, 's'))
Eric Smitha9f7d622008-02-17 19:46:49 +00001273 goto done;
1274
1275 /* type conversion? */
1276 switch (format.type) {
1277 case 's':
1278 /* no type conversion needed, already a string. do the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +00001279 result = format_string_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001280 break;
1281 default:
1282 /* unknown */
Eric Smithe9fb6862009-02-20 14:02:36 +00001283 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smitha9f7d622008-02-17 19:46:49 +00001284 goto done;
1285 }
1286
1287done:
Eric Smitha9f7d622008-02-17 19:46:49 +00001288 return result;
1289}
Eric Smitha9f7d622008-02-17 19:46:49 +00001290
1291#if defined FORMAT_LONG || defined FORMAT_INT
1292static PyObject*
Eric Smithdc13b792008-05-30 18:10:04 +00001293format_int_or_long(PyObject* obj,
Eric Smithaca19e62009-04-22 13:29:05 +00001294 STRINGLIB_CHAR *format_spec,
1295 Py_ssize_t format_spec_len,
1296 IntOrLongToString tostring)
Eric Smitha9f7d622008-02-17 19:46:49 +00001297{
Eric Smitha9f7d622008-02-17 19:46:49 +00001298 PyObject *result = NULL;
1299 PyObject *tmp = NULL;
1300 InternalFormatSpec format;
1301
Eric Smitha9f7d622008-02-17 19:46:49 +00001302 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +00001303 it equivalent to str(obj) */
1304 if (format_spec_len == 0) {
1305 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +00001306 goto done;
1307 }
1308
1309 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +00001310 if (!parse_internal_render_format_spec(format_spec,
Eric Smithaca19e62009-04-22 13:29:05 +00001311 format_spec_len,
1312 &format, 'd'))
Eric Smitha9f7d622008-02-17 19:46:49 +00001313 goto done;
1314
1315 /* type conversion? */
1316 switch (format.type) {
1317 case 'b':
1318 case 'c':
1319 case 'd':
1320 case 'o':
1321 case 'x':
1322 case 'X':
Eric Smithcf537ff2008-05-11 19:52:48 +00001323 case 'n':
Eric Smitha9f7d622008-02-17 19:46:49 +00001324 /* no type conversion needed, already an int (or long). do
Eric Smithaca19e62009-04-22 13:29:05 +00001325 the formatting */
1326 result = format_int_or_long_internal(obj, &format, tostring);
Eric Smitha9f7d622008-02-17 19:46:49 +00001327 break;
1328
1329 case 'e':
1330 case 'E':
1331 case 'f':
1332 case 'F':
1333 case 'g':
1334 case 'G':
Eric Smitha9f7d622008-02-17 19:46:49 +00001335 case '%':
1336 /* convert to float */
Eric Smithdc13b792008-05-30 18:10:04 +00001337 tmp = PyNumber_Float(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +00001338 if (tmp == NULL)
1339 goto done;
Eric Smith8c023242009-04-13 00:29:50 +00001340 result = format_float_internal(tmp, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001341 break;
1342
1343 default:
1344 /* unknown */
Eric Smithe9fb6862009-02-20 14:02:36 +00001345 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smitha9f7d622008-02-17 19:46:49 +00001346 goto done;
1347 }
1348
1349done:
1350 Py_XDECREF(tmp);
1351 return result;
1352}
1353#endif /* FORMAT_LONG || defined FORMAT_INT */
1354
1355#ifdef FORMAT_LONG
1356/* Need to define long_format as a function that will convert a long
1357 to a string. In 3.0, _PyLong_Format has the correct signature. In
1358 2.x, we need to fudge a few parameters */
1359#if PY_VERSION_HEX >= 0x03000000
1360#define long_format _PyLong_Format
1361#else
1362static PyObject*
1363long_format(PyObject* value, int base)
1364{
1365 /* Convert to base, don't add trailing 'L', and use the new octal
1366 format. We already know this is a long object */
1367 assert(PyLong_Check(value));
1368 /* convert to base, don't add 'L', and use the new octal format */
1369 return _PyLong_Format(value, base, 0, 1);
1370}
1371#endif
1372
1373PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +00001374FORMAT_LONG(PyObject *obj,
Eric Smithaca19e62009-04-22 13:29:05 +00001375 STRINGLIB_CHAR *format_spec,
1376 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +00001377{
Eric Smithdc13b792008-05-30 18:10:04 +00001378 return format_int_or_long(obj, format_spec, format_spec_len,
Eric Smithaca19e62009-04-22 13:29:05 +00001379 long_format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001380}
1381#endif /* FORMAT_LONG */
1382
1383#ifdef FORMAT_INT
1384/* this is only used for 2.x, not 3.0 */
1385static PyObject*
1386int_format(PyObject* value, int base)
1387{
1388 /* Convert to base, and use the new octal format. We already
1389 know this is an int object */
1390 assert(PyInt_Check(value));
1391 return _PyInt_Format((PyIntObject*)value, base, 1);
1392}
1393
1394PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +00001395FORMAT_INT(PyObject *obj,
Eric Smithaca19e62009-04-22 13:29:05 +00001396 STRINGLIB_CHAR *format_spec,
1397 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +00001398{
Eric Smithdc13b792008-05-30 18:10:04 +00001399 return format_int_or_long(obj, format_spec, format_spec_len,
Eric Smithaca19e62009-04-22 13:29:05 +00001400 int_format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001401}
1402#endif /* FORMAT_INT */
1403
1404#ifdef FORMAT_FLOAT
1405PyObject *
Eric Smithdc13b792008-05-30 18:10:04 +00001406FORMAT_FLOAT(PyObject *obj,
Eric Smithaca19e62009-04-22 13:29:05 +00001407 STRINGLIB_CHAR *format_spec,
1408 Py_ssize_t format_spec_len)
Eric Smitha9f7d622008-02-17 19:46:49 +00001409{
Eric Smitha9f7d622008-02-17 19:46:49 +00001410 PyObject *result = NULL;
1411 InternalFormatSpec format;
1412
Eric Smitha9f7d622008-02-17 19:46:49 +00001413 /* check for the special case of zero length format spec, make
Eric Smithdc13b792008-05-30 18:10:04 +00001414 it equivalent to str(obj) */
1415 if (format_spec_len == 0) {
1416 result = STRINGLIB_TOSTR(obj);
Eric Smitha9f7d622008-02-17 19:46:49 +00001417 goto done;
1418 }
1419
1420 /* parse the format_spec */
Eric Smithdc13b792008-05-30 18:10:04 +00001421 if (!parse_internal_render_format_spec(format_spec,
Eric Smithaca19e62009-04-22 13:29:05 +00001422 format_spec_len,
1423 &format, '\0'))
Eric Smitha9f7d622008-02-17 19:46:49 +00001424 goto done;
1425
1426 /* type conversion? */
1427 switch (format.type) {
Eric Smithaca19e62009-04-22 13:29:05 +00001428 case '\0': /* No format code: like 'g', but with at least one decimal. */
Eric Smitha9f7d622008-02-17 19:46:49 +00001429 case 'e':
1430 case 'E':
1431 case 'f':
1432 case 'F':
1433 case 'g':
1434 case 'G':
1435 case 'n':
1436 case '%':
1437 /* no conversion, already a float. do the formatting */
Eric Smithdc13b792008-05-30 18:10:04 +00001438 result = format_float_internal(obj, &format);
Eric Smitha9f7d622008-02-17 19:46:49 +00001439 break;
1440
1441 default:
1442 /* unknown */
Eric Smithe9fb6862009-02-20 14:02:36 +00001443 unknown_presentation_type(format.type, obj->ob_type->tp_name);
Eric Smitha9f7d622008-02-17 19:46:49 +00001444 goto done;
1445 }
1446
1447done:
1448 return result;
1449}
1450#endif /* FORMAT_FLOAT */
Eric Smith9139cc62009-04-30 00:58:58 +00001451
1452#ifdef FORMAT_COMPLEX
1453PyObject *
1454FORMAT_COMPLEX(PyObject *obj,
1455 STRINGLIB_CHAR *format_spec,
1456 Py_ssize_t format_spec_len)
1457{
1458 PyObject *result = NULL;
1459 InternalFormatSpec format;
1460
1461 /* check for the special case of zero length format spec, make
1462 it equivalent to str(obj) */
1463 if (format_spec_len == 0) {
1464 result = STRINGLIB_TOSTR(obj);
1465 goto done;
1466 }
1467
1468 /* parse the format_spec */
1469 if (!parse_internal_render_format_spec(format_spec,
1470 format_spec_len,
1471 &format, '\0'))
1472 goto done;
1473
1474 /* type conversion? */
1475 switch (format.type) {
1476 case '\0': /* No format code: like 'g', but with at least one decimal. */
1477 case 'e':
1478 case 'E':
1479 case 'f':
1480 case 'F':
1481 case 'g':
1482 case 'G':
1483 case 'n':
1484 /* no conversion, already a complex. do the formatting */
1485 result = format_complex_internal(obj, &format);
1486 break;
1487
1488 default:
1489 /* unknown */
1490 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1491 goto done;
1492 }
1493
1494done:
1495 return result;
1496}
1497#endif /* FORMAT_COMPLEX */