blob: 79f63e2603538d2f72964613eb28a4d15cf7fcf0 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
Martin v. Löwis737ea822004-06-08 18:52:54 +00006/**
7 * PyOS_ascii_strtod:
8 * @nptr: the string to convert to a numeric value.
9 * @endptr: if non-%NULL, it returns the character after
10 * the last character used in the conversion.
11 *
12 * Converts a string to a #gdouble value.
13 * This function behaves like the standard strtod() function
14 * does in the C locale. It does this without actually
15 * changing the current locale, since that would not be
16 * thread-safe.
17 *
18 * This function is typically used when reading configuration
19 * files or other non-user input that should be locale independent.
20 * To handle input from the user you should normally use the
21 * locale-sensitive system strtod() function.
22 *
23 * If the correct value would cause overflow, plus or minus %HUGE_VAL
24 * is returned (according to the sign of the value), and %ERANGE is
25 * stored in %errno. If the correct value would cause underflow,
26 * zero is returned and %ERANGE is stored in %errno.
Georg Brandlb569ee42006-05-29 14:28:05 +000027 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000028 *
29 * This function resets %errno before calling strtod() so that
30 * you can reliably detect overflow and underflow.
31 *
32 * Return value: the #gdouble value.
33 **/
Eric Smithaca19e62009-04-22 13:29:05 +000034
35/*
36 Use system strtod; since strtod is locale aware, we may
37 have to first fix the decimal separator.
38
39 Note that unlike _Py_dg_strtod, the system strtod may not always give
40 correctly rounded results.
41*/
42
Mark Dickinson01fce5a2009-05-03 22:33:34 +000043/* Case-insensitive string match used for nan and inf detection; t should be
44 lower-case. Returns 1 for a successful match, 0 otherwise. */
45
46static int
47case_insensitive_match(const char *s, const char *t)
48{
49 while(*t && Py_TOLOWER(*s) == *t) {
50 s++;
51 t++;
52 }
53 return *t ? 0 : 1;
54}
55
Martin v. Löwis737ea822004-06-08 18:52:54 +000056double
Neal Norwitze7214a12005-12-18 05:03:17 +000057PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000058{
59 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000060 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000061 struct lconv *locale_data;
62 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000063 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000064 const char *p, *decimal_point_pos;
65 const char *end = NULL; /* Silence gcc */
Guido van Rossum3b835492008-01-05 00:59:59 +000066 const char *digits_pos = NULL;
67 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000068
Martin v. Löwis737ea822004-06-08 18:52:54 +000069 assert(nptr != NULL);
70
71 fail_pos = NULL;
72
73 locale_data = localeconv();
74 decimal_point = locale_data->decimal_point;
75 decimal_point_len = strlen(decimal_point);
76
77 assert(decimal_point_len != 0);
78
79 decimal_point_pos = NULL;
Guido van Rossum3b835492008-01-05 00:59:59 +000080
Mark Dickinson6d6b2202009-04-26 16:04:05 +000081 /* Set errno to zero, so that we can distinguish zero results
82 and underflows */
83 errno = 0;
84
Guido van Rossum3b835492008-01-05 00:59:59 +000085 /* We process any leading whitespace and the optional sign manually,
86 then pass the remainder to the system strtod. This ensures that
87 the result of an underflow has the correct sign. (bug #1725) */
88
89 p = nptr;
90 /* Skip leading space */
Eric Smithcac7af62009-04-27 19:04:37 +000091 while (Py_ISSPACE(*p))
Guido van Rossum3b835492008-01-05 00:59:59 +000092 p++;
93
94 /* Process leading sign, if present */
95 if (*p == '-') {
96 negate = 1;
97 p++;
Mark Dickinson6d6b2202009-04-26 16:04:05 +000098 }
99 else if (*p == '+') {
Guido van Rossum3b835492008-01-05 00:59:59 +0000100 p++;
101 }
102
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000103 /* Parse infinities and nans */
104 if (*p == 'i' || *p == 'I') {
Mark Dickinson01fce5a2009-05-03 22:33:34 +0000105 if (case_insensitive_match(p+1, "nf")) {
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000106 val = Py_HUGE_VAL;
Mark Dickinson01fce5a2009-05-03 22:33:34 +0000107 if (case_insensitive_match(p+3, "inity"))
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000108 fail_pos = (char *)p+8;
109 else
110 fail_pos = (char *)p+3;
111 goto got_val;
112 }
113 else
114 goto invalid_string;
Guido van Rossum3b835492008-01-05 00:59:59 +0000115 }
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000116#ifdef Py_NAN
117 if (*p == 'n' || *p == 'N') {
Mark Dickinson01fce5a2009-05-03 22:33:34 +0000118 if (case_insensitive_match(p+1, "an")) {
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000119 val = Py_NAN;
120 fail_pos = (char *)p+3;
121 goto got_val;
122 }
123 else
124 goto invalid_string;
125 }
126#endif
Guido van Rossum3b835492008-01-05 00:59:59 +0000127
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000128 /* Some platform strtods accept hex floats; Python shouldn't (at the
129 moment), so we check explicitly for strings starting with '0x'. */
130 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
131 goto invalid_string;
132
133 /* Check that what's left begins with a digit or decimal point */
Eric Smithcac7af62009-04-27 19:04:37 +0000134 if (!Py_ISDIGIT(*p) && *p != '.')
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000135 goto invalid_string;
136
137 digits_pos = p;
138 if (decimal_point[0] != '.' ||
Martin v. Löwis737ea822004-06-08 18:52:54 +0000139 decimal_point[1] != 0)
140 {
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000141 /* Look for a '.' in the input; if present, it'll need to be
142 swapped for the current locale's decimal point before we
143 call strtod. On the other hand, if we find the current
144 locale's decimal point then the input is invalid. */
Eric Smithcac7af62009-04-27 19:04:37 +0000145 while (Py_ISDIGIT(*p))
Neal Norwitze7214a12005-12-18 05:03:17 +0000146 p++;
147
148 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000149 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000150 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000151
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000152 /* locate end of number */
Eric Smithcac7af62009-04-27 19:04:37 +0000153 while (Py_ISDIGIT(*p))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000154 p++;
155
Neal Norwitze7214a12005-12-18 05:03:17 +0000156 if (*p == 'e' || *p == 'E')
157 p++;
158 if (*p == '+' || *p == '-')
159 p++;
Eric Smithcac7af62009-04-27 19:04:37 +0000160 while (Py_ISDIGIT(*p))
Neal Norwitze7214a12005-12-18 05:03:17 +0000161 p++;
162 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000163 }
Martin v. Löwisfcfff0a2006-07-03 12:19:50 +0000164 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
Martin v. Löwisfcfff0a2006-07-03 12:19:50 +0000165 /* Python bug #1417699 */
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000166 goto invalid_string;
Eric Smith5c35a9d2008-03-17 12:14:29 +0000167 /* For the other cases, we need not convert the decimal
168 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000169 }
170
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000171 if (decimal_point_pos) {
Martin v. Löwis737ea822004-06-08 18:52:54 +0000172 char *copy, *c;
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000173 /* Create a copy of the input, with the '.' converted to the
174 locale-specific decimal point */
Guido van Rossum3b835492008-01-05 00:59:59 +0000175 copy = (char *)PyMem_MALLOC(end - digits_pos +
176 1 + decimal_point_len);
Georg Brandlb569ee42006-05-29 14:28:05 +0000177 if (copy == NULL) {
178 if (endptr)
Georg Brandl80181e22006-05-29 14:33:55 +0000179 *endptr = (char *)nptr;
Georg Brandlb569ee42006-05-29 14:28:05 +0000180 errno = ENOMEM;
181 return val;
182 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000183
184 c = copy;
Guido van Rossum3b835492008-01-05 00:59:59 +0000185 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
186 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000187 memcpy(c, decimal_point, decimal_point_len);
188 c += decimal_point_len;
Eric Smith5c35a9d2008-03-17 12:14:29 +0000189 memcpy(c, decimal_point_pos + 1,
190 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000191 c += end - (decimal_point_pos + 1);
192 *c = 0;
193
194 val = strtod(copy, &fail_pos);
195
196 if (fail_pos)
197 {
198 if (fail_pos > decimal_point_pos)
Guido van Rossum3b835492008-01-05 00:59:59 +0000199 fail_pos = (char *)digits_pos +
200 (fail_pos - copy) -
201 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000202 else
Guido van Rossum3b835492008-01-05 00:59:59 +0000203 fail_pos = (char *)digits_pos +
204 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000205 }
206
Brett Cannon0ed05872006-05-25 20:44:08 +0000207 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000208
209 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000210 else {
Guido van Rossum3b835492008-01-05 00:59:59 +0000211 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000212 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000213
Guido van Rossum3b835492008-01-05 00:59:59 +0000214 if (fail_pos == digits_pos)
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000215 goto invalid_string;
Guido van Rossum3b835492008-01-05 00:59:59 +0000216
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000217 got_val:
Guido van Rossum3b835492008-01-05 00:59:59 +0000218 if (negate && fail_pos != nptr)
219 val = -val;
220
Martin v. Löwis737ea822004-06-08 18:52:54 +0000221 if (endptr)
222 *endptr = fail_pos;
223
224 return val;
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000225
226 invalid_string:
227 if (endptr)
228 *endptr = (char*)nptr;
229 errno = EINVAL;
230 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000231}
232
Eric Smithaca19e62009-04-22 13:29:05 +0000233double
234PyOS_ascii_atof(const char *nptr)
235{
236 return PyOS_ascii_strtod(nptr, NULL);
237}
238
239
Eric Smith0a950632008-04-30 01:09:30 +0000240/* Given a string that may have a decimal point in the current
241 locale, change it back to a dot. Since the string cannot get
242 longer, no need for a maximum buffer size parameter. */
243Py_LOCAL_INLINE(void)
244change_decimal_from_locale_to_dot(char* buffer)
245{
246 struct lconv *locale_data = localeconv();
247 const char *decimal_point = locale_data->decimal_point;
248
249 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
250 size_t decimal_point_len = strlen(decimal_point);
251
252 if (*buffer == '+' || *buffer == '-')
253 buffer++;
Eric Smithcac7af62009-04-27 19:04:37 +0000254 while (Py_ISDIGIT(*buffer))
Eric Smith0a950632008-04-30 01:09:30 +0000255 buffer++;
256 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
257 *buffer = '.';
258 buffer++;
259 if (decimal_point_len > 1) {
260 /* buffer needs to get smaller */
261 size_t rest_len = strlen(buffer +
262 (decimal_point_len - 1));
263 memmove(buffer,
264 buffer + (decimal_point_len - 1),
265 rest_len);
266 buffer[rest_len] = 0;
267 }
268 }
269 }
270}
271
Martin v. Löwis737ea822004-06-08 18:52:54 +0000272
Eric Smith068f0652009-04-25 21:40:15 +0000273Py_LOCAL_INLINE(void)
274ensure_sign(char* buffer, size_t buf_size)
275{
Eric Smith94cc00c2009-04-28 07:33:09 +0000276 size_t len;
Eric Smith068f0652009-04-25 21:40:15 +0000277
278 if (buffer[0] == '-')
279 /* Already have a sign. */
280 return;
281
282 /* Include the trailing 0 byte. */
283 len = strlen(buffer)+1;
284 if (len >= buf_size+1)
285 /* No room for the sign, don't do anything. */
286 return;
287
288 memmove(buffer+1, buffer, len);
289 buffer[0] = '+';
290}
291
Eric Smith7ef40bf2008-02-20 23:34:22 +0000292/* From the C99 standard, section 7.19.6:
293The exponent always contains at least two digits, and only as many more digits
294as necessary to represent the exponent.
295*/
296#define MIN_EXPONENT_DIGITS 2
297
Eric Smith0a950632008-04-30 01:09:30 +0000298/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
299 in length. */
300Py_LOCAL_INLINE(void)
Mark Dickinsone73cbe72009-04-26 19:54:55 +0000301ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smith0a950632008-04-30 01:09:30 +0000302{
303 char *p = strpbrk(buffer, "eE");
304 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
305 char *start = p + 2;
306 int exponent_digit_cnt = 0;
307 int leading_zero_cnt = 0;
308 int in_leading_zeros = 1;
309 int significant_digit_cnt;
310
311 /* Skip over the exponent and the sign. */
312 p += 2;
313
314 /* Find the end of the exponent, keeping track of leading
315 zeros. */
Eric Smithcac7af62009-04-27 19:04:37 +0000316 while (*p && Py_ISDIGIT(*p)) {
Eric Smith0a950632008-04-30 01:09:30 +0000317 if (in_leading_zeros && *p == '0')
318 ++leading_zero_cnt;
319 if (*p != '0')
320 in_leading_zeros = 0;
321 ++p;
322 ++exponent_digit_cnt;
323 }
324
325 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
326 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
327 /* If there are 2 exactly digits, we're done,
328 regardless of what they contain */
329 }
330 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
331 int extra_zeros_cnt;
332
333 /* There are more than 2 digits in the exponent. See
334 if we can delete some of the leading zeros */
335 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
336 significant_digit_cnt = MIN_EXPONENT_DIGITS;
337 extra_zeros_cnt = exponent_digit_cnt -
338 significant_digit_cnt;
339
340 /* Delete extra_zeros_cnt worth of characters from the
341 front of the exponent */
342 assert(extra_zeros_cnt >= 0);
343
344 /* Add one to significant_digit_cnt to copy the
345 trailing 0 byte, thus setting the length */
346 memmove(start,
347 start + extra_zeros_cnt,
348 significant_digit_cnt + 1);
349 }
350 else {
351 /* If there are fewer than 2 digits, add zeros
352 until there are 2, if there's enough room */
353 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
354 if (start + zeros + exponent_digit_cnt + 1
355 < buffer + buf_size) {
356 memmove(start + zeros, start,
357 exponent_digit_cnt + 1);
358 memset(start, '0', zeros);
359 }
360 }
361 }
362}
363
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000364/* Remove trailing zeros after the decimal point from a numeric string; also
365 remove the decimal point if all digits following it are zero. The numeric
366 string must end in '\0', and should not have any leading or trailing
367 whitespace. Assumes that the decimal point is '.'. */
Eric Smith0a950632008-04-30 01:09:30 +0000368Py_LOCAL_INLINE(void)
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000369remove_trailing_zeros(char *buffer)
Eric Smith0a950632008-04-30 01:09:30 +0000370{
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000371 char *old_fraction_end, *new_fraction_end, *end, *p;
372
373 p = buffer;
374 if (*p == '-' || *p == '+')
375 /* Skip leading sign, if present */
376 ++p;
377 while (Py_ISDIGIT(*p))
378 ++p;
379
380 /* if there's no decimal point there's nothing to do */
381 if (*p++ != '.')
382 return;
383
384 /* scan any digits after the point */
385 while (Py_ISDIGIT(*p))
386 ++p;
387 old_fraction_end = p;
388
389 /* scan up to ending '\0' */
390 while (*p != '\0')
391 p++;
392 /* +1 to make sure that we move the null byte as well */
393 end = p+1;
394
395 /* scan back from fraction_end, looking for removable zeros */
396 p = old_fraction_end;
397 while (*(p-1) == '0')
398 --p;
399 /* and remove point if we've got that far */
400 if (*(p-1) == '.')
401 --p;
402 new_fraction_end = p;
403
404 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
405}
406
407/* Ensure that buffer has a decimal point in it. The decimal point will not
408 be in the current locale, it will always be '.'. Don't add a decimal point
409 if an exponent is present. Also, convert to exponential notation where
410 adding a '.0' would produce too many significant digits (see issue 5864).
411
412 Returns a pointer to the fixed buffer, or NULL on failure.
413*/
414Py_LOCAL_INLINE(char *)
415ensure_decimal_point(char* buffer, size_t buf_size, int precision)
416{
417 int digit_count, insert_count = 0, convert_to_exp = 0;
418 char* chars_to_insert, *digits_start;
Eric Smith0a950632008-04-30 01:09:30 +0000419
420 /* search for the first non-digit character */
421 char *p = buffer;
Eric Smithf032a002008-07-19 00:24:05 +0000422 if (*p == '-' || *p == '+')
423 /* Skip leading sign, if present. I think this could only
424 ever be '-', but it can't hurt to check for both. */
425 ++p;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000426 digits_start = p;
Eric Smithcac7af62009-04-27 19:04:37 +0000427 while (*p && Py_ISDIGIT(*p))
Eric Smith0a950632008-04-30 01:09:30 +0000428 ++p;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000429 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smith0a950632008-04-30 01:09:30 +0000430
431 if (*p == '.') {
Eric Smithcac7af62009-04-27 19:04:37 +0000432 if (Py_ISDIGIT(*(p+1))) {
Eric Smith0a950632008-04-30 01:09:30 +0000433 /* Nothing to do, we already have a decimal
434 point and a digit after it */
435 }
436 else {
437 /* We have a decimal point, but no following
438 digit. Insert a zero after the decimal. */
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000439 /* can't ever get here via PyOS_double_to_string */
440 assert(precision == -1);
Eric Smith0a950632008-04-30 01:09:30 +0000441 ++p;
442 chars_to_insert = "0";
443 insert_count = 1;
444 }
445 }
Eric Smithaca19e62009-04-22 13:29:05 +0000446 else if (!(*p == 'e' || *p == 'E')) {
447 /* Don't add ".0" if we have an exponent. */
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000448 if (digit_count == precision) {
449 /* issue 5864: don't add a trailing .0 in the case
450 where the '%g'-formatted result already has as many
451 significant digits as were requested. Switch to
452 exponential notation instead. */
453 convert_to_exp = 1;
454 /* no exponent, no point, and we shouldn't land here
455 for infs and nans, so we must be at the end of the
456 string. */
457 assert(*p == '\0');
458 }
459 else {
460 assert(precision == -1 || digit_count < precision);
461 chars_to_insert = ".0";
462 insert_count = 2;
463 }
Eric Smith0a950632008-04-30 01:09:30 +0000464 }
465 if (insert_count) {
466 size_t buf_len = strlen(buffer);
467 if (buf_len + insert_count + 1 >= buf_size) {
468 /* If there is not enough room in the buffer
469 for the additional text, just skip it. It's
470 not worth generating an error over. */
471 }
472 else {
473 memmove(p + insert_count, p,
474 buffer + strlen(buffer) - p + 1);
475 memcpy(p, chars_to_insert, insert_count);
476 }
477 }
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000478 if (convert_to_exp) {
479 int written;
480 size_t buf_avail;
481 p = digits_start;
482 /* insert decimal point */
483 assert(digit_count >= 1);
484 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
485 p[1] = '.';
486 p += digit_count+1;
487 assert(p <= buf_size+buffer);
488 buf_avail = buf_size+buffer-p;
489 if (buf_avail == 0)
490 return NULL;
491 /* Add exponent. It's okay to use lower case 'e': we only
492 arrive here as a result of using the empty format code or
493 repr/str builtins and those never want an upper case 'E' */
494 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
495 if (!(0 <= written &&
496 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
497 /* output truncated, or something else bad happened */
498 return NULL;
499 remove_trailing_zeros(buffer);
500 }
501 return buffer;
Eric Smith0a950632008-04-30 01:09:30 +0000502}
503
Eric Smith7ef40bf2008-02-20 23:34:22 +0000504/* see FORMATBUFLEN in unicodeobject.c */
505#define FLOAT_FORMATBUFLEN 120
506
Martin v. Löwis737ea822004-06-08 18:52:54 +0000507/**
Eric Smith068f0652009-04-25 21:40:15 +0000508 * _PyOS_ascii_formatd:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000509 * @buffer: A buffer to place the resulting string in
Eric Smith8113ca62008-03-17 11:01:01 +0000510 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000511 * @format: The printf()-style format to use for the
512 * code to use for converting.
513 * @d: The #gdouble to convert
514 *
515 * Converts a #gdouble to a string, using the '.' as
516 * decimal point. To format the number you pass in
517 * a printf()-style format string. Allowed conversion
Eric Smithaca19e62009-04-22 13:29:05 +0000518 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000519 *
Eric Smith8113ca62008-03-17 11:01:01 +0000520 * 'Z' is the same as 'g', except it always has a decimal and
521 * at least one digit after the decimal.
Eric Smith7ef40bf2008-02-20 23:34:22 +0000522 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000523 * Return value: The pointer to the buffer with the converted string.
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000524 * On failure returns NULL but does not set any Python exception.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000525 **/
Eric Smith068f0652009-04-25 21:40:15 +0000526/* DEPRECATED, will be deleted in 2.8 and 3.2 */
527PyAPI_FUNC(char *)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000528PyOS_ascii_formatd(char *buffer,
Eric Smith8113ca62008-03-17 11:01:01 +0000529 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000530 const char *format,
531 double d)
532{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000533 char format_char;
Eric Smith7ef40bf2008-02-20 23:34:22 +0000534 size_t format_len = strlen(format);
535
Eric Smith8113ca62008-03-17 11:01:01 +0000536 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
537 also with at least one character past the decimal. */
Eric Smith7ef40bf2008-02-20 23:34:22 +0000538 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000539
Eric Smith068f0652009-04-25 21:40:15 +0000540 if (PyErr_WarnEx(PyExc_DeprecationWarning,
541 "PyOS_ascii_formatd is deprecated, "
542 "use PyOS_double_to_string instead", 1) < 0)
543 return NULL;
544
Eric Smith7ef40bf2008-02-20 23:34:22 +0000545 /* The last character in the format string must be the format char */
546 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000547
Martin v. Löwis737ea822004-06-08 18:52:54 +0000548 if (format[0] != '%')
549 return NULL;
550
Eric Smith7ef40bf2008-02-20 23:34:22 +0000551 /* I'm not sure why this test is here. It's ensuring that the format
552 string after the first character doesn't have a single quote, a
553 lowercase l, or a percent. This is the reverse of the commented-out
554 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000555 if (strpbrk(format + 1, "'l%"))
556 return NULL;
557
Eric Smith8113ca62008-03-17 11:01:01 +0000558 /* Also curious about this function is that it accepts format strings
559 like "%xg", which are invalid for floats. In general, the
560 interface to this function is not very good, but changing it is
561 difficult because it's a public API. */
562
Martin v. Löwis737ea822004-06-08 18:52:54 +0000563 if (!(format_char == 'e' || format_char == 'E' ||
564 format_char == 'f' || format_char == 'F' ||
Eric Smith7ef40bf2008-02-20 23:34:22 +0000565 format_char == 'g' || format_char == 'G' ||
Eric Smithaca19e62009-04-22 13:29:05 +0000566 format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000567 return NULL;
568
Eric Smithaca19e62009-04-22 13:29:05 +0000569 /* Map 'Z' format_char to 'g', by copying the format string and
Eric Smith8113ca62008-03-17 11:01:01 +0000570 replacing the final char with a 'g' */
Eric Smithaca19e62009-04-22 13:29:05 +0000571 if (format_char == 'Z') {
Eric Smith7ef40bf2008-02-20 23:34:22 +0000572 if (format_len + 1 >= sizeof(tmp_format)) {
573 /* The format won't fit in our copy. Error out. In
Eric Smith5c35a9d2008-03-17 12:14:29 +0000574 practice, this will never happen and will be
575 detected by returning NULL */
Eric Smith7ef40bf2008-02-20 23:34:22 +0000576 return NULL;
577 }
578 strcpy(tmp_format, format);
579 tmp_format[format_len - 1] = 'g';
580 format = tmp_format;
581 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000582
Eric Smith8113ca62008-03-17 11:01:01 +0000583
Eric Smith7ef40bf2008-02-20 23:34:22 +0000584 /* Have PyOS_snprintf do the hard work */
Eric Smith8113ca62008-03-17 11:01:01 +0000585 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000586
Eric Smith0a950632008-04-30 01:09:30 +0000587 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000588
Eric Smith0a950632008-04-30 01:09:30 +0000589 /* Get the current locale, and find the decimal point string.
Eric Smithaca19e62009-04-22 13:29:05 +0000590 Convert that string back to a dot. */
591 change_decimal_from_locale_to_dot(buffer);
Eric Smith7ef40bf2008-02-20 23:34:22 +0000592
593 /* If an exponent exists, ensure that the exponent is at least
594 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
595 for the extra zeros. Also, if there are more than
596 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
597 back to MIN_EXPONENT_DIGITS */
Mark Dickinsone73cbe72009-04-26 19:54:55 +0000598 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000599
Eric Smith8113ca62008-03-17 11:01:01 +0000600 /* If format_char is 'Z', make sure we have at least one character
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000601 after the decimal point (and make sure we have a decimal point);
602 also switch to exponential notation in some edge cases where the
603 extra character would produce more significant digits that we
604 really want. */
Eric Smith0a950632008-04-30 01:09:30 +0000605 if (format_char == 'Z')
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000606 buffer = ensure_decimal_point(buffer, buf_size, -1);
Eric Smith8113ca62008-03-17 11:01:01 +0000607
Martin v. Löwis737ea822004-06-08 18:52:54 +0000608 return buffer;
609}
610
Mark Dickinsondf108ca2009-04-29 21:56:53 +0000611/* Precisions used by repr() and str(), respectively.
612
613 The repr() precision (17 significant decimal digits) is the minimal number
614 that is guaranteed to have enough precision so that if the number is read
615 back in the exact same binary value is recreated. This is true for IEEE
616 floating point by design, and also happens to work for all other modern
617 hardware.
618
619 The str() precision (12 significant decimal digits) is chosen so that in
620 most cases, the rounding noise created by various operations is suppressed,
621 while giving plenty of precision for practical use.
622
623*/
624
Eric Smith068f0652009-04-25 21:40:15 +0000625PyAPI_FUNC(void)
626_PyOS_double_to_string(char *buf, size_t buf_len, double val,
627 char format_code, int precision,
628 int flags, int *ptype)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000629{
Eric Smithaca19e62009-04-22 13:29:05 +0000630 char format[32];
Eric Smithaca19e62009-04-22 13:29:05 +0000631 int t;
632 int upper = 0;
633
Eric Smith068f0652009-04-25 21:40:15 +0000634 if (buf_len < 1) {
635 assert(0);
636 /* There's no way to signal this error. Just return. */
637 return;
638 }
639 buf[0] = 0;
640
Eric Smithaca19e62009-04-22 13:29:05 +0000641 /* Validate format_code, and map upper and lower case */
642 switch (format_code) {
643 case 'e': /* exponent */
644 case 'f': /* fixed */
645 case 'g': /* general */
646 break;
647 case 'E':
648 upper = 1;
649 format_code = 'e';
650 break;
651 case 'F':
652 upper = 1;
653 format_code = 'f';
654 break;
655 case 'G':
656 upper = 1;
657 format_code = 'g';
658 break;
659 case 'r': /* repr format */
660 /* Supplied precision is unused, must be 0. */
Eric Smith068f0652009-04-25 21:40:15 +0000661 if (precision != 0)
662 return;
Eric Smitha985a3a2009-05-05 18:26:08 +0000663 /* The repr() precision (17 significant decimal digits) is the
664 minimal number that is guaranteed to have enough precision
665 so that if the number is read back in the exact same binary
666 value is recreated. This is true for IEEE floating point
667 by design, and also happens to work for all other modern
668 hardware. */
Eric Smithaca19e62009-04-22 13:29:05 +0000669 precision = 17;
670 format_code = 'g';
671 break;
Eric Smithaca19e62009-04-22 13:29:05 +0000672 default:
Eric Smith068f0652009-04-25 21:40:15 +0000673 assert(0);
674 return;
675 }
676
677 /* Check for buf too small to fit "-inf". Other buffer too small
678 conditions are dealt with when converting or formatting finite
679 numbers. */
680 if (buf_len < 5) {
681 assert(0);
682 return;
Eric Smithaca19e62009-04-22 13:29:05 +0000683 }
684
685 /* Handle nan and inf. */
686 if (Py_IS_NAN(val)) {
687 strcpy(buf, "nan");
688 t = Py_DTST_NAN;
689 } else if (Py_IS_INFINITY(val)) {
690 if (copysign(1., val) == 1.)
691 strcpy(buf, "inf");
692 else
693 strcpy(buf, "-inf");
694 t = Py_DTST_INFINITE;
695 } else {
696 t = Py_DTST_FINITE;
697
Eric Smith068f0652009-04-25 21:40:15 +0000698 /* Build the format string. */
699 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
700 (flags & Py_DTSF_ALT ? "#" : ""), precision,
701 format_code);
Eric Smithaca19e62009-04-22 13:29:05 +0000702
Eric Smith068f0652009-04-25 21:40:15 +0000703 /* Have PyOS_snprintf do the hard work. */
704 PyOS_snprintf(buf, buf_len, format, val);
705
706 /* Do various fixups on the return string */
707
708 /* Get the current locale, and find the decimal point string.
709 Convert that string back to a dot. */
710 change_decimal_from_locale_to_dot(buf);
711
712 /* If an exponent exists, ensure that the exponent is at least
713 MIN_EXPONENT_DIGITS digits, providing the buffer is large
714 enough for the extra zeros. Also, if there are more than
715 MIN_EXPONENT_DIGITS, remove as many zeros as possible until
716 we get back to MIN_EXPONENT_DIGITS */
Mark Dickinsone73cbe72009-04-26 19:54:55 +0000717 ensure_minimum_exponent_length(buf, buf_len);
Eric Smith068f0652009-04-25 21:40:15 +0000718
719 /* Possibly make sure we have at least one character after the
720 decimal point (and make sure we have a decimal point). */
Eric Smithaca19e62009-04-22 13:29:05 +0000721 if (flags & Py_DTSF_ADD_DOT_0)
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000722 buf = ensure_decimal_point(buf, buf_len, precision);
Eric Smithaca19e62009-04-22 13:29:05 +0000723 }
724
Eric Smith068f0652009-04-25 21:40:15 +0000725 /* Add the sign if asked and the result isn't negative. */
726 if (flags & Py_DTSF_SIGN && buf[0] != '-')
727 ensure_sign(buf, buf_len);
Eric Smithaca19e62009-04-22 13:29:05 +0000728
Eric Smith068f0652009-04-25 21:40:15 +0000729 if (upper) {
730 /* Convert to upper case. */
731 char *p;
732 for (p = buf; *p; p++)
Mark Dickinson777e4ff2009-05-03 20:59:48 +0000733 *p = Py_TOUPPER(*p);
Eric Smith068f0652009-04-25 21:40:15 +0000734 }
735
736 if (ptype)
737 *ptype = t;
738}
739
740
741PyAPI_FUNC(char *) PyOS_double_to_string(double val,
742 char format_code,
743 int precision,
744 int flags,
745 int *ptype)
746{
747 char buf[128];
748 Py_ssize_t len;
749 char *result;
750
751 _PyOS_double_to_string(buf, sizeof(buf), val, format_code, precision,
752 flags, ptype);
753 len = strlen(buf);
754 if (len == 0) {
755 PyErr_BadInternalCall();
756 return NULL;
757 }
758
759 /* Add 1 for the trailing 0 byte. */
760 result = PyMem_Malloc(len + 1);
Eric Smithaca19e62009-04-22 13:29:05 +0000761 if (result == NULL) {
762 PyErr_NoMemory();
763 return NULL;
764 }
Eric Smith068f0652009-04-25 21:40:15 +0000765 strcpy(result, buf);
Eric Smithaca19e62009-04-22 13:29:05 +0000766
Eric Smithaca19e62009-04-22 13:29:05 +0000767 return result;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000768}