blob: 1c8202c776188489ac835c6d0eab57a7e8d06286 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
Victor Stinnere9e7d282020-02-12 22:54:42 +01004#include "pycore_dtoa.h"
Martin v. Löwis737ea822004-06-08 18:52:54 +00005#include <locale.h>
6
Mark Dickinson3b38df22009-10-26 14:36:29 +00007/* Case-insensitive string match used for nan and inf detection; t should be
8 lower-case. Returns 1 for a successful match, 0 otherwise. */
Mark Dickinsonbd16edd2009-05-20 22:05:25 +00009
10static int
11case_insensitive_match(const char *s, const char *t)
12{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 while(*t && Py_TOLOWER(*s) == *t) {
14 s++;
15 t++;
16 }
17 return *t ? 0 : 1;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000018}
19
Mark Dickinson3b38df22009-10-26 14:36:29 +000020/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
21 "infinity", with an optional leading sign of "+" or "-". On success,
22 return the NaN or Infinity as a double and set *endptr to point just beyond
23 the successfully parsed portion of the string. On failure, return -1.0 and
24 set *endptr to point to the start of the string. */
25
Mark Dickinsone383e822012-04-29 15:31:56 +010026#ifndef PY_NO_SHORT_FLOAT_REPR
27
28double
29_Py_parse_inf_or_nan(const char *p, char **endptr)
30{
31 double retval;
32 const char *s;
33 int negate = 0;
34
35 s = p;
36 if (*s == '-') {
37 negate = 1;
38 s++;
39 }
40 else if (*s == '+') {
41 s++;
42 }
43 if (case_insensitive_match(s, "inf")) {
44 s += 3;
45 if (case_insensitive_match(s, "inity"))
46 s += 5;
47 retval = _Py_dg_infinity(negate);
48 }
49 else if (case_insensitive_match(s, "nan")) {
50 s += 3;
51 retval = _Py_dg_stdnan(negate);
52 }
53 else {
54 s = p;
55 retval = -1.0;
56 }
57 *endptr = (char *)s;
58 return retval;
59}
60
61#else
62
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000063double
64_Py_parse_inf_or_nan(const char *p, char **endptr)
65{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 double retval;
67 const char *s;
68 int negate = 0;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 s = p;
71 if (*s == '-') {
72 negate = 1;
73 s++;
74 }
75 else if (*s == '+') {
76 s++;
77 }
78 if (case_insensitive_match(s, "inf")) {
79 s += 3;
80 if (case_insensitive_match(s, "inity"))
81 s += 5;
82 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
83 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000084#ifdef Py_NAN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 else if (case_insensitive_match(s, "nan")) {
86 s += 3;
87 retval = negate ? -Py_NAN : Py_NAN;
88 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000089#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 else {
91 s = p;
92 retval = -1.0;
93 }
94 *endptr = (char *)s;
95 return retval;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000096}
97
Mark Dickinsone383e822012-04-29 15:31:56 +010098#endif
99
Martin v. Löwis737ea822004-06-08 18:52:54 +0000100/**
Eric Smith68af50b2010-02-22 14:58:30 +0000101 * _PyOS_ascii_strtod:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000102 * @nptr: the string to convert to a numeric value.
103 * @endptr: if non-%NULL, it returns the character after
104 * the last character used in the conversion.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000106 * Converts a string to a #gdouble value.
107 * This function behaves like the standard strtod() function
108 * does in the C locale. It does this without actually
109 * changing the current locale, since that would not be
110 * thread-safe.
111 *
112 * This function is typically used when reading configuration
113 * files or other non-user input that should be locale independent.
114 * To handle input from the user you should normally use the
115 * locale-sensitive system strtod() function.
116 *
117 * If the correct value would cause overflow, plus or minus %HUGE_VAL
118 * is returned (according to the sign of the value), and %ERANGE is
119 * stored in %errno. If the correct value would cause underflow,
120 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000121 * If memory allocation fails, %ENOMEM is stored in %errno.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000123 * This function resets %errno before calling strtod() so that
124 * you can reliably detect overflow and underflow.
125 *
126 * Return value: the #gdouble value.
127 **/
Eric Smith0923d1d2009-04-16 20:16:10 +0000128
129#ifndef PY_NO_SHORT_FLOAT_REPR
130
Eric Smith68af50b2010-02-22 14:58:30 +0000131static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000132_PyOS_ascii_strtod(const char *nptr, char **endptr)
Eric Smith0923d1d2009-04-16 20:16:10 +0000133{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 double result;
135 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +0000136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 assert(nptr != NULL);
138 /* Set errno to zero, so that we can distinguish zero results
139 and underflows */
140 errno = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 _Py_SET_53BIT_PRECISION_START;
143 result = _Py_dg_strtod(nptr, endptr);
144 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +0000145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 if (*endptr == nptr)
147 /* string might represent an inf or nan */
148 result = _Py_parse_inf_or_nan(nptr, endptr);
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 return result;
Eric Smith0923d1d2009-04-16 20:16:10 +0000151
152}
153
154#else
155
156/*
157 Use system strtod; since strtod is locale aware, we may
158 have to first fix the decimal separator.
159
160 Note that unlike _Py_dg_strtod, the system strtod may not always give
161 correctly rounded results.
162*/
163
Eric Smith68af50b2010-02-22 14:58:30 +0000164static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000165_PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 char *fail_pos;
Georg Brandl6083a4b2013-10-14 06:51:46 +0200168 double val;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 struct lconv *locale_data;
170 const char *decimal_point;
171 size_t decimal_point_len;
172 const char *p, *decimal_point_pos;
173 const char *end = NULL; /* Silence gcc */
174 const char *digits_pos = NULL;
175 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 assert(nptr != NULL);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 fail_pos = NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 locale_data = localeconv();
182 decimal_point = locale_data->decimal_point;
183 decimal_point_len = strlen(decimal_point);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* Parse infinities and nans */
190 val = _Py_parse_inf_or_nan(nptr, endptr);
191 if (*endptr != nptr)
192 return val;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 /* Set errno to zero, so that we can distinguish zero results
195 and underflows */
196 errno = 0;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 /* We process the optional sign manually, then pass the remainder to
199 the system strtod. This ensures that the result of an underflow
200 has the correct sign. (bug #1725) */
201 p = nptr;
202 /* Process leading sign, if present */
203 if (*p == '-') {
204 negate = 1;
205 p++;
206 }
207 else if (*p == '+') {
208 p++;
209 }
Christian Heimesfaf2f632008-01-06 16:59:19 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 /* Some platform strtods accept hex floats; Python shouldn't (at the
212 moment), so we check explicitly for strings starting with '0x'. */
213 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
214 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 /* Check that what's left begins with a digit or decimal point */
217 if (!Py_ISDIGIT(*p) && *p != '.')
218 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 digits_pos = p;
221 if (decimal_point[0] != '.' ||
222 decimal_point[1] != 0)
223 {
224 /* Look for a '.' in the input; if present, it'll need to be
225 swapped for the current locale's decimal point before we
226 call strtod. On the other hand, if we find the current
227 locale's decimal point then the input is invalid. */
228 while (Py_ISDIGIT(*p))
229 p++;
Neal Norwitze7214a12005-12-18 05:03:17 +0000230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 if (*p == '.')
232 {
233 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 /* locate end of number */
236 while (Py_ISDIGIT(*p))
237 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 if (*p == 'e' || *p == 'E')
240 p++;
241 if (*p == '+' || *p == '-')
242 p++;
243 while (Py_ISDIGIT(*p))
244 p++;
245 end = p;
246 }
247 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
248 /* Python bug #1417699 */
249 goto invalid_string;
250 /* For the other cases, we need not convert the decimal
251 point */
252 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 if (decimal_point_pos) {
255 char *copy, *c;
256 /* Create a copy of the input, with the '.' converted to the
257 locale-specific decimal point */
258 copy = (char *)PyMem_MALLOC(end - digits_pos +
259 1 + decimal_point_len);
260 if (copy == NULL) {
261 *endptr = (char *)nptr;
262 errno = ENOMEM;
263 return val;
264 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 c = copy;
267 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
268 c += decimal_point_pos - digits_pos;
269 memcpy(c, decimal_point, decimal_point_len);
270 c += decimal_point_len;
271 memcpy(c, decimal_point_pos + 1,
272 end - (decimal_point_pos + 1));
273 c += end - (decimal_point_pos + 1);
274 *c = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 val = strtod(copy, &fail_pos);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 if (fail_pos)
279 {
280 if (fail_pos > decimal_point_pos)
281 fail_pos = (char *)digits_pos +
282 (fail_pos - copy) -
283 (decimal_point_len - 1);
284 else
285 fail_pos = (char *)digits_pos +
286 (fail_pos - copy);
287 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 }
292 else {
293 val = strtod(digits_pos, &fail_pos);
294 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 if (fail_pos == digits_pos)
297 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 if (negate && fail_pos != nptr)
300 val = -val;
301 *endptr = fail_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000304
305 invalid_string:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 *endptr = (char*)nptr;
307 errno = EINVAL;
308 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000309}
310
Eric Smith0923d1d2009-04-16 20:16:10 +0000311#endif
312
Eric Smith68af50b2010-02-22 14:58:30 +0000313/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
314 as a string of ASCII characters) to a float. The string should not have
315 leading or trailing whitespace. The conversion is independent of the
316 current locale.
Mark Dickinson725bfd82009-05-03 20:33:40 +0000317
318 If endptr is NULL, try to convert the whole string. Raise ValueError and
319 return -1.0 if the string is not a valid representation of a floating-point
320 number.
321
322 If endptr is non-NULL, try to convert as much of the string as possible.
323 If no initial segment of the string is the valid representation of a
324 floating-point number then *endptr is set to point to the beginning of the
325 string, -1.0 is returned and again ValueError is raised.
326
327 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
328 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200329 exception is raised. Otherwise, overflow_exception should point to
Mark Dickinson725bfd82009-05-03 20:33:40 +0000330 a Python exception, this exception will be raised, -1.0 will be returned,
331 and *endptr will point just past the end of the converted value.
332
333 If any other failure occurs (for example lack of memory), -1.0 is returned
334 and the appropriate Python exception will have been set.
335*/
336
337double
338PyOS_string_to_double(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 char **endptr,
340 PyObject *overflow_exception)
Mark Dickinson725bfd82009-05-03 20:33:40 +0000341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 double x, result=-1.0;
343 char *fail_pos;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 errno = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 x = _PyOS_ascii_strtod(s, &fail_pos);
Mark Dickinson725bfd82009-05-03 20:33:40 +0000347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 if (errno == ENOMEM) {
349 PyErr_NoMemory();
350 fail_pos = (char *)s;
351 }
352 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
353 PyErr_Format(PyExc_ValueError,
354 "could not convert string to float: "
Pedro Lacerda4fa75042019-05-17 19:32:44 -0300355 "'%.200s'", s);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 else if (fail_pos == s)
357 PyErr_Format(PyExc_ValueError,
358 "could not convert string to float: "
Pedro Lacerda4fa75042019-05-17 19:32:44 -0300359 "'%.200s'", s);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
361 PyErr_Format(overflow_exception,
362 "value too large to convert to float: "
Pedro Lacerda4fa75042019-05-17 19:32:44 -0300363 "'%.200s'", s);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 else
365 result = x;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 if (endptr != NULL)
368 *endptr = fail_pos;
369 return result;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000370}
Eric Smith0923d1d2009-04-16 20:16:10 +0000371
Brett Cannona721aba2016-09-09 14:57:09 -0700372/* Remove underscores that follow the underscore placement rule from
373 the string and then call the `innerfunc` function on the result.
374 It should return a new object or NULL on exception.
375
376 `what` is used for the error message emitted when underscores are detected
377 that don't follow the rule. `arg` is an opaque pointer passed to the inner
378 function.
379
380 This is used to implement underscore-agnostic conversion for floats
381 and complex numbers.
382*/
383PyObject *
384_Py_string_to_number_with_underscores(
385 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
386 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
387{
388 char prev;
389 const char *p, *last;
390 char *dup, *end;
391 PyObject *result;
392
INADA Naoki16dfca42018-07-14 12:06:43 +0900393 assert(s[orig_len] == '\0');
394
Brett Cannona721aba2016-09-09 14:57:09 -0700395 if (strchr(s, '_') == NULL) {
396 return innerfunc(s, orig_len, arg);
397 }
398
399 dup = PyMem_Malloc(orig_len + 1);
Zackery Spytz4c49da02018-12-07 03:11:30 -0700400 if (dup == NULL) {
401 return PyErr_NoMemory();
402 }
Brett Cannona721aba2016-09-09 14:57:09 -0700403 end = dup;
404 prev = '\0';
405 last = s + orig_len;
406 for (p = s; *p; p++) {
407 if (*p == '_') {
408 /* Underscores are only allowed after digits. */
409 if (!(prev >= '0' && prev <= '9')) {
410 goto error;
411 }
412 }
413 else {
414 *end++ = *p;
415 /* Underscores are only allowed before digits. */
416 if (prev == '_' && !(*p >= '0' && *p <= '9')) {
417 goto error;
418 }
419 }
420 prev = *p;
421 }
422 /* Underscores are not allowed at the end. */
423 if (prev == '_') {
424 goto error;
425 }
426 /* No embedded NULs allowed. */
427 if (p != last) {
428 goto error;
429 }
430 *end = '\0';
431 result = innerfunc(dup, end - dup, arg);
432 PyMem_Free(dup);
433 return result;
434
435 error:
436 PyMem_Free(dup);
437 PyErr_Format(PyExc_ValueError,
Barry Warsawb2e57942017-09-14 18:13:16 -0700438 "could not convert string to %s: "
439 "%R", what, obj);
Brett Cannona721aba2016-09-09 14:57:09 -0700440 return NULL;
441}
442
Eric Smith68af50b2010-02-22 14:58:30 +0000443#ifdef PY_NO_SHORT_FLOAT_REPR
444
Eric Smithb2c7af82008-04-30 02:12:09 +0000445/* Given a string that may have a decimal point in the current
446 locale, change it back to a dot. Since the string cannot get
447 longer, no need for a maximum buffer size parameter. */
448Py_LOCAL_INLINE(void)
449change_decimal_from_locale_to_dot(char* buffer)
450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 struct lconv *locale_data = localeconv();
452 const char *decimal_point = locale_data->decimal_point;
Eric Smithb2c7af82008-04-30 02:12:09 +0000453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
455 size_t decimal_point_len = strlen(decimal_point);
Eric Smithb2c7af82008-04-30 02:12:09 +0000456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 if (*buffer == '+' || *buffer == '-')
458 buffer++;
459 while (Py_ISDIGIT(*buffer))
460 buffer++;
461 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
462 *buffer = '.';
463 buffer++;
464 if (decimal_point_len > 1) {
465 /* buffer needs to get smaller */
466 size_t rest_len = strlen(buffer +
467 (decimal_point_len - 1));
468 memmove(buffer,
469 buffer + (decimal_point_len - 1),
470 rest_len);
471 buffer[rest_len] = 0;
472 }
473 }
474 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000475}
476
Martin v. Löwis737ea822004-06-08 18:52:54 +0000477
Christian Heimesc3f30c42008-02-22 16:37:40 +0000478/* From the C99 standard, section 7.19.6:
479The exponent always contains at least two digits, and only as many more digits
480as necessary to represent the exponent.
481*/
482#define MIN_EXPONENT_DIGITS 2
483
Eric Smithb2c7af82008-04-30 02:12:09 +0000484/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
485 in length. */
486Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000487ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000488{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 char *p = strpbrk(buffer, "eE");
490 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
491 char *start = p + 2;
492 int exponent_digit_cnt = 0;
493 int leading_zero_cnt = 0;
494 int in_leading_zeros = 1;
495 int significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 /* Skip over the exponent and the sign. */
498 p += 2;
Eric Smithb2c7af82008-04-30 02:12:09 +0000499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 /* Find the end of the exponent, keeping track of leading
501 zeros. */
502 while (*p && Py_ISDIGIT(*p)) {
503 if (in_leading_zeros && *p == '0')
504 ++leading_zero_cnt;
505 if (*p != '0')
506 in_leading_zeros = 0;
507 ++p;
508 ++exponent_digit_cnt;
509 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
512 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
513 /* If there are 2 exactly digits, we're done,
514 regardless of what they contain */
515 }
516 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
517 int extra_zeros_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 /* There are more than 2 digits in the exponent. See
520 if we can delete some of the leading zeros */
521 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
522 significant_digit_cnt = MIN_EXPONENT_DIGITS;
523 extra_zeros_cnt = exponent_digit_cnt -
524 significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 /* Delete extra_zeros_cnt worth of characters from the
527 front of the exponent */
528 assert(extra_zeros_cnt >= 0);
Eric Smithb2c7af82008-04-30 02:12:09 +0000529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 /* Add one to significant_digit_cnt to copy the
531 trailing 0 byte, thus setting the length */
532 memmove(start,
533 start + extra_zeros_cnt,
534 significant_digit_cnt + 1);
535 }
536 else {
537 /* If there are fewer than 2 digits, add zeros
538 until there are 2, if there's enough room */
539 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
540 if (start + zeros + exponent_digit_cnt + 1
541 < buffer + buf_size) {
542 memmove(start + zeros, start,
543 exponent_digit_cnt + 1);
544 memset(start, '0', zeros);
545 }
546 }
547 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000548}
549
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000550/* Remove trailing zeros after the decimal point from a numeric string; also
551 remove the decimal point if all digits following it are zero. The numeric
552 string must end in '\0', and should not have any leading or trailing
553 whitespace. Assumes that the decimal point is '.'. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000554Py_LOCAL_INLINE(void)
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000555remove_trailing_zeros(char *buffer)
Eric Smithb2c7af82008-04-30 02:12:09 +0000556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 char *old_fraction_end, *new_fraction_end, *end, *p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 p = buffer;
560 if (*p == '-' || *p == '+')
561 /* Skip leading sign, if present */
562 ++p;
563 while (Py_ISDIGIT(*p))
564 ++p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 /* if there's no decimal point there's nothing to do */
567 if (*p++ != '.')
568 return;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 /* scan any digits after the point */
571 while (Py_ISDIGIT(*p))
572 ++p;
573 old_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 /* scan up to ending '\0' */
576 while (*p != '\0')
577 p++;
578 /* +1 to make sure that we move the null byte as well */
579 end = p+1;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 /* scan back from fraction_end, looking for removable zeros */
582 p = old_fraction_end;
583 while (*(p-1) == '0')
584 --p;
585 /* and remove point if we've got that far */
586 if (*(p-1) == '.')
587 --p;
588 new_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000591}
592
593/* Ensure that buffer has a decimal point in it. The decimal point will not
594 be in the current locale, it will always be '.'. Don't add a decimal point
595 if an exponent is present. Also, convert to exponential notation where
596 adding a '.0' would produce too many significant digits (see issue 5864).
597
598 Returns a pointer to the fixed buffer, or NULL on failure.
599*/
600Py_LOCAL_INLINE(char *)
601ensure_decimal_point(char* buffer, size_t buf_size, int precision)
602{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 int digit_count, insert_count = 0, convert_to_exp = 0;
Serhiy Storchakae2f92de2017-11-11 13:06:26 +0200604 const char *chars_to_insert;
605 char *digits_start;
Eric Smithb2c7af82008-04-30 02:12:09 +0000606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 /* search for the first non-digit character */
608 char *p = buffer;
609 if (*p == '-' || *p == '+')
610 /* Skip leading sign, if present. I think this could only
611 ever be '-', but it can't hurt to check for both. */
612 ++p;
613 digits_start = p;
614 while (*p && Py_ISDIGIT(*p))
615 ++p;
616 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smithb2c7af82008-04-30 02:12:09 +0000617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 if (*p == '.') {
619 if (Py_ISDIGIT(*(p+1))) {
620 /* Nothing to do, we already have a decimal
621 point and a digit after it */
622 }
623 else {
624 /* We have a decimal point, but no following
625 digit. Insert a zero after the decimal. */
626 /* can't ever get here via PyOS_double_to_string */
627 assert(precision == -1);
628 ++p;
629 chars_to_insert = "0";
630 insert_count = 1;
631 }
632 }
633 else if (!(*p == 'e' || *p == 'E')) {
634 /* Don't add ".0" if we have an exponent. */
635 if (digit_count == precision) {
636 /* issue 5864: don't add a trailing .0 in the case
637 where the '%g'-formatted result already has as many
638 significant digits as were requested. Switch to
639 exponential notation instead. */
640 convert_to_exp = 1;
641 /* no exponent, no point, and we shouldn't land here
642 for infs and nans, so we must be at the end of the
643 string. */
644 assert(*p == '\0');
645 }
646 else {
647 assert(precision == -1 || digit_count < precision);
648 chars_to_insert = ".0";
649 insert_count = 2;
650 }
651 }
652 if (insert_count) {
653 size_t buf_len = strlen(buffer);
654 if (buf_len + insert_count + 1 >= buf_size) {
655 /* If there is not enough room in the buffer
656 for the additional text, just skip it. It's
657 not worth generating an error over. */
658 }
659 else {
660 memmove(p + insert_count, p,
661 buffer + strlen(buffer) - p + 1);
662 memcpy(p, chars_to_insert, insert_count);
663 }
664 }
665 if (convert_to_exp) {
666 int written;
667 size_t buf_avail;
668 p = digits_start;
669 /* insert decimal point */
670 assert(digit_count >= 1);
671 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
672 p[1] = '.';
673 p += digit_count+1;
674 assert(p <= buf_size+buffer);
675 buf_avail = buf_size+buffer-p;
676 if (buf_avail == 0)
677 return NULL;
678 /* Add exponent. It's okay to use lower case 'e': we only
679 arrive here as a result of using the empty format code or
680 repr/str builtins and those never want an upper case 'E' */
681 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
682 if (!(0 <= written &&
683 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
684 /* output truncated, or something else bad happened */
685 return NULL;
686 remove_trailing_zeros(buffer);
687 }
688 return buffer;
Eric Smithb2c7af82008-04-30 02:12:09 +0000689}
690
Christian Heimesc3f30c42008-02-22 16:37:40 +0000691/* see FORMATBUFLEN in unicodeobject.c */
692#define FLOAT_FORMATBUFLEN 120
693
Martin v. Löwis737ea822004-06-08 18:52:54 +0000694/**
Eric Smith68af50b2010-02-22 14:58:30 +0000695 * _PyOS_ascii_formatd:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000696 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000697 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000698 * @format: The printf()-style format to use for the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 * code to use for converting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000700 * @d: The #gdouble to convert
Eric Smith68af50b2010-02-22 14:58:30 +0000701 * @precision: The precision to use when formatting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000702 *
703 * Converts a #gdouble to a string, using the '.' as
704 * decimal point. To format the number you pass in
705 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000706 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 *
Christian Heimesb186d002008-03-18 15:15:01 +0000708 * 'Z' is the same as 'g', except it always has a decimal and
709 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000710 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000711 * Return value: The pointer to the buffer with the converted string.
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000712 * On failure returns NULL but does not set any Python exception.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000713 **/
Eric Smith68af50b2010-02-22 14:58:30 +0000714static char *
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715_PyOS_ascii_formatd(char *buffer,
716 size_t buf_size,
717 const char *format,
718 double d,
719 int precision)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000720{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 char format_char;
722 size_t format_len = strlen(format);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
725 also with at least one character past the decimal. */
726 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 /* The last character in the format string must be the format char */
729 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 if (format[0] != '%')
732 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 /* I'm not sure why this test is here. It's ensuring that the format
735 string after the first character doesn't have a single quote, a
736 lowercase l, or a percent. This is the reverse of the commented-out
737 test about 10 lines ago. */
738 if (strpbrk(format + 1, "'l%"))
739 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 /* Also curious about this function is that it accepts format strings
742 like "%xg", which are invalid for floats. In general, the
743 interface to this function is not very good, but changing it is
744 difficult because it's a public API. */
Christian Heimesb186d002008-03-18 15:15:01 +0000745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 if (!(format_char == 'e' || format_char == 'E' ||
747 format_char == 'f' || format_char == 'F' ||
748 format_char == 'g' || format_char == 'G' ||
749 format_char == 'Z'))
750 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 /* Map 'Z' format_char to 'g', by copying the format string and
753 replacing the final char with a 'g' */
754 if (format_char == 'Z') {
755 if (format_len + 1 >= sizeof(tmp_format)) {
756 /* The format won't fit in our copy. Error out. In
757 practice, this will never happen and will be
758 detected by returning NULL */
759 return NULL;
760 }
761 strcpy(tmp_format, format);
762 tmp_format[format_len - 1] = 'g';
763 format = tmp_format;
764 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000765
Christian Heimesb186d002008-03-18 15:15:01 +0000766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 /* Have PyOS_snprintf do the hard work */
768 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 /* Get the current locale, and find the decimal point string.
773 Convert that string back to a dot. */
774 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 /* If an exponent exists, ensure that the exponent is at least
777 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
778 for the extra zeros. Also, if there are more than
779 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
780 back to MIN_EXPONENT_DIGITS */
781 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 /* If format_char is 'Z', make sure we have at least one character
784 after the decimal point (and make sure we have a decimal point);
785 also switch to exponential notation in some edge cases where the
786 extra character would produce more significant digits that we
787 really want. */
788 if (format_char == 'Z')
789 buffer = ensure_decimal_point(buffer, buf_size, precision);
Christian Heimesb186d002008-03-18 15:15:01 +0000790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 return buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000792}
793
Eric Smith0923d1d2009-04-16 20:16:10 +0000794/* The fallback code to use if _Py_dg_dtoa is not available. */
795
Benjamin Petersone5024512018-09-12 12:06:42 -0700796char * PyOS_double_to_string(double val,
Eric Smith0923d1d2009-04-16 20:16:10 +0000797 char format_code,
798 int precision,
799 int flags,
800 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000801{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 char format[32];
803 Py_ssize_t bufsize;
804 char *buf;
805 int t, exp;
806 int upper = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Validate format_code, and map upper and lower case */
809 switch (format_code) {
810 case 'e': /* exponent */
811 case 'f': /* fixed */
812 case 'g': /* general */
813 break;
814 case 'E':
815 upper = 1;
816 format_code = 'e';
817 break;
818 case 'F':
819 upper = 1;
820 format_code = 'f';
821 break;
822 case 'G':
823 upper = 1;
824 format_code = 'g';
825 break;
826 case 'r': /* repr format */
827 /* Supplied precision is unused, must be 0. */
828 if (precision != 0) {
829 PyErr_BadInternalCall();
830 return NULL;
831 }
832 /* The repr() precision (17 significant decimal digits) is the
833 minimal number that is guaranteed to have enough precision
834 so that if the number is read back in the exact same binary
835 value is recreated. This is true for IEEE floating point
836 by design, and also happens to work for all other modern
837 hardware. */
838 precision = 17;
839 format_code = 'g';
840 break;
841 default:
842 PyErr_BadInternalCall();
843 return NULL;
844 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 /* Here's a quick-and-dirty calculation to figure out how big a buffer
847 we need. In general, for a finite float we need:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 1 byte for each digit of the decimal significand, and
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 1 for a possible sign
852 1 for a possible decimal point
853 2 for a possible [eE][+-]
854 1 for each digit of the exponent; if we allow 19 digits
855 total then we're safe up to exponents of 2**63.
856 1 for the trailing nul byte
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 This gives a total of 24 + the number of digits in the significand,
859 and the number of digits in the significand is:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 for 'g' format: at most precision, except possibly
862 when precision == 0, when it's 1.
863 for 'e' format: precision+1
864 for 'f' format: precision digits after the point, at least 1
865 before. To figure out how many digits appear before the point
866 we have to examine the size of the number. If fabs(val) < 1.0
867 then there will be only one digit before the point. If
868 fabs(val) >= 1.0, then there are at most
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 1+floor(log10(ceiling(fabs(val))))
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 digits before the point (where the 'ceiling' allows for the
873 possibility that the rounding rounds the integer part of val
874 up). A safe upper bound for the above quantity is
875 1+floor(exp/3), where exp is the unique integer such that 0.5
876 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
877 frexp.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000878
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 So we allow room for precision+1 digits for all formats, plus an
880 extra floor(exp/3) digits for 'f' format.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 */
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
885 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
886 bufsize = 5;
887 else {
888 bufsize = 25 + precision;
889 if (format_code == 'f' && fabs(val) >= 1.0) {
890 frexp(val, &exp);
891 bufsize += exp/3;
892 }
893 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 buf = PyMem_Malloc(bufsize);
896 if (buf == NULL) {
897 PyErr_NoMemory();
898 return NULL;
899 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 /* Handle nan and inf. */
902 if (Py_IS_NAN(val)) {
903 strcpy(buf, "nan");
904 t = Py_DTST_NAN;
905 } else if (Py_IS_INFINITY(val)) {
906 if (copysign(1., val) == 1.)
907 strcpy(buf, "inf");
908 else
909 strcpy(buf, "-inf");
910 t = Py_DTST_INFINITE;
911 } else {
912 t = Py_DTST_FINITE;
913 if (flags & Py_DTSF_ADD_DOT_0)
914 format_code = 'Z';
Eric Smith0923d1d2009-04-16 20:16:10 +0000915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
917 (flags & Py_DTSF_ALT ? "#" : ""), precision,
918 format_code);
919 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
920 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 /* Add sign when requested. It's convenient (esp. when formatting
923 complex numbers) to include a sign even for inf and nan. */
924 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
925 size_t len = strlen(buf);
926 /* the bufsize calculations above should ensure that we've got
927 space to add a sign */
928 assert((size_t)bufsize >= len+2);
929 memmove(buf+1, buf, len+1);
930 buf[0] = '+';
931 }
932 if (upper) {
933 /* Convert to upper case. */
934 char *p1;
935 for (p1 = buf; *p1; p1++)
936 *p1 = Py_TOUPPER(*p1);
937 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 if (type)
940 *type = t;
941 return buf;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000942}
Eric Smith0923d1d2009-04-16 20:16:10 +0000943
944#else
945
946/* _Py_dg_dtoa is available. */
947
948/* I'm using a lookup table here so that I don't have to invent a non-locale
949 specific way to convert to uppercase */
950#define OFS_INF 0
951#define OFS_NAN 1
952#define OFS_E 2
953
954/* The lengths of these are known to the code below, so don't change them */
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200955static const char * const lc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 "inf",
957 "nan",
958 "e",
Eric Smith0923d1d2009-04-16 20:16:10 +0000959};
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200960static const char * const uc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 "INF",
962 "NAN",
963 "E",
Eric Smith0923d1d2009-04-16 20:16:10 +0000964};
965
966
967/* Convert a double d to a string, and return a PyMem_Malloc'd block of
968 memory contain the resulting string.
969
970 Arguments:
971 d is the double to be converted
Eric Smith63376222009-05-05 14:04:18 +0000972 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
973 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
Eric Smith0923d1d2009-04-16 20:16:10 +0000974 mode is one of '0', '2' or '3', and is completely determined by
Eric Smith63376222009-05-05 14:04:18 +0000975 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
Eric Smith0923d1d2009-04-16 20:16:10 +0000976 precision is the desired precision
977 always_add_sign is nonzero if a '+' sign should be included for positive
978 numbers
979 add_dot_0_if_integer is nonzero if integers in non-exponential form
Eric Smith63376222009-05-05 14:04:18 +0000980 should have ".0" added. Only applies to format codes 'r' and 'g'.
Eric Smith0923d1d2009-04-16 20:16:10 +0000981 use_alt_formatting is nonzero if alternative formatting should be
Eric Smith63376222009-05-05 14:04:18 +0000982 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
983 at most one of use_alt_formatting and add_dot_0_if_integer should
984 be nonzero.
Eric Smith0923d1d2009-04-16 20:16:10 +0000985 type, if non-NULL, will be set to one of these constants to identify
986 the type of the 'd' argument:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 Py_DTST_FINITE
988 Py_DTST_INFINITE
989 Py_DTST_NAN
Eric Smith0923d1d2009-04-16 20:16:10 +0000990
991 Returns a PyMem_Malloc'd block of memory containing the resulting string,
992 or NULL on error. If NULL is returned, the Python error has been set.
993 */
994
995static char *
996format_float_short(double d, char format_code,
Victor Stinner7b251352013-06-24 23:37:40 +0200997 int mode, int precision,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 int always_add_sign, int add_dot_0_if_integer,
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200999 int use_alt_formatting, const char * const *float_strings,
1000 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +00001001{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 char *buf = NULL;
1003 char *p = NULL;
1004 Py_ssize_t bufsize = 0;
1005 char *digits, *digits_end;
1006 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1007 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1008 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +00001009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1011 Must be matched by a call to _Py_dg_freedtoa. */
1012 _Py_SET_53BIT_PRECISION_START;
1013 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1014 &digits_end);
1015 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +00001016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 decpt = (Py_ssize_t)decpt_as_int;
1018 if (digits == NULL) {
1019 /* The only failure mode is no memory. */
1020 PyErr_NoMemory();
1021 goto exit;
1022 }
1023 assert(digits_end != NULL && digits_end >= digits);
1024 digits_len = digits_end - digits;
Eric Smith0923d1d2009-04-16 20:16:10 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 if (digits_len && !Py_ISDIGIT(digits[0])) {
1027 /* Infinities and nans here; adapt Gay's output,
1028 so convert Infinity to inf and NaN to nan, and
1029 ignore sign of nan. Then return. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 /* ignore the actual sign of a nan */
1032 if (digits[0] == 'n' || digits[0] == 'N')
1033 sign = 0;
Mark Dickinsonad476da2009-04-23 19:14:16 +00001034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 /* We only need 5 bytes to hold the result "+inf\0" . */
1036 bufsize = 5; /* Used later in an assert. */
1037 buf = (char *)PyMem_Malloc(bufsize);
1038 if (buf == NULL) {
1039 PyErr_NoMemory();
1040 goto exit;
1041 }
1042 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 if (sign == 1) {
1045 *p++ = '-';
1046 }
1047 else if (always_add_sign) {
1048 *p++ = '+';
1049 }
1050 if (digits[0] == 'i' || digits[0] == 'I') {
1051 strncpy(p, float_strings[OFS_INF], 3);
1052 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +00001053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 if (type)
1055 *type = Py_DTST_INFINITE;
1056 }
1057 else if (digits[0] == 'n' || digits[0] == 'N') {
1058 strncpy(p, float_strings[OFS_NAN], 3);
1059 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +00001060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 if (type)
1062 *type = Py_DTST_NAN;
1063 }
1064 else {
1065 /* shouldn't get here: Gay's code should always return
1066 something starting with a digit, an 'I', or 'N' */
Barry Warsawb2e57942017-09-14 18:13:16 -07001067 Py_UNREACHABLE();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 }
1069 goto exit;
1070 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 /* The result must be finite (not inf or nan). */
1073 if (type)
1074 *type = Py_DTST_FINITE;
Eric Smith0923d1d2009-04-16 20:16:10 +00001075
1076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 /* We got digits back, format them. We may need to pad 'digits'
1078 either on the left or right (or both) with extra zeros, so in
1079 general the resulting string has the form
Eric Smith0923d1d2009-04-16 20:16:10 +00001080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 [<sign>]<zeros><digits><zeros>[<exponent>]
Eric Smith0923d1d2009-04-16 20:16:10 +00001082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 where either of the <zeros> pieces could be empty, and there's a
1084 decimal point that could appear either in <digits> or in the
1085 leading or trailing <zeros>.
Eric Smith0923d1d2009-04-16 20:16:10 +00001086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 Imagine an infinite 'virtual' string vdigits, consisting of the
1088 string 'digits' (starting at index 0) padded on both the left and
1089 right with infinite strings of zeros. We want to output a slice
Eric Smith0923d1d2009-04-16 20:16:10 +00001090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 vdigits[vdigits_start : vdigits_end]
Eric Smith0923d1d2009-04-16 20:16:10 +00001092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 of this virtual string. Thus if vdigits_start < 0 then we'll end
1094 up producing some leading zeros; if vdigits_end > digits_len there
1095 will be trailing zeros in the output. The next section of code
1096 determines whether to use an exponent or not, figures out the
1097 position 'decpt' of the decimal point, and computes 'vdigits_start'
1098 and 'vdigits_end'. */
1099 vdigits_end = digits_len;
1100 switch (format_code) {
1101 case 'e':
1102 use_exp = 1;
1103 vdigits_end = precision;
1104 break;
1105 case 'f':
1106 vdigits_end = decpt + precision;
1107 break;
1108 case 'g':
1109 if (decpt <= -4 || decpt >
1110 (add_dot_0_if_integer ? precision-1 : precision))
1111 use_exp = 1;
1112 if (use_alt_formatting)
1113 vdigits_end = precision;
1114 break;
1115 case 'r':
1116 /* convert to exponential format at 1e16. We used to convert
1117 at 1e17, but that gives odd-looking results for some values
1118 when a 16-digit 'shortest' repr is padded with bogus zeros.
1119 For example, repr(2e16+8) would give 20000000000000010.0;
1120 the true value is 20000000000000008.0. */
1121 if (decpt <= -4 || decpt > 16)
1122 use_exp = 1;
1123 break;
1124 default:
1125 PyErr_BadInternalCall();
1126 goto exit;
1127 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 /* if using an exponent, reset decimal point position to 1 and adjust
1130 exponent accordingly.*/
1131 if (use_exp) {
Victor Stinner7b251352013-06-24 23:37:40 +02001132 exp = (int)decpt - 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 decpt = 1;
1134 }
1135 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1136 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1137 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1138 if (!use_exp && add_dot_0_if_integer)
1139 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1140 else
1141 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
Eric Smith0923d1d2009-04-16 20:16:10 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 /* double check inequalities */
1144 assert(vdigits_start <= 0 &&
1145 0 <= digits_len &&
1146 digits_len <= vdigits_end);
1147 /* decimal point should be in (vdigits_start, vdigits_end] */
1148 assert(vdigits_start < decpt && decpt <= vdigits_end);
Eric Smith0923d1d2009-04-16 20:16:10 +00001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 /* Compute an upper bound how much memory we need. This might be a few
1151 chars too long, but no big deal. */
1152 bufsize =
1153 /* sign, decimal point and trailing 0 byte */
1154 3 +
Eric Smith0923d1d2009-04-16 20:16:10 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 /* total digit count (including zero padding on both sides) */
1157 (vdigits_end - vdigits_start) +
Eric Smith0923d1d2009-04-16 20:16:10 +00001158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 /* exponent "e+100", max 3 numerical digits */
1160 (use_exp ? 5 : 0);
Eric Smith0923d1d2009-04-16 20:16:10 +00001161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 /* Now allocate the memory and initialize p to point to the start of
1163 it. */
1164 buf = (char *)PyMem_Malloc(bufsize);
1165 if (buf == NULL) {
1166 PyErr_NoMemory();
1167 goto exit;
1168 }
1169 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 /* Add a negative sign if negative, and a plus sign if non-negative
1172 and always_add_sign is true. */
1173 if (sign == 1)
1174 *p++ = '-';
1175 else if (always_add_sign)
1176 *p++ = '+';
Eric Smith0923d1d2009-04-16 20:16:10 +00001177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 /* note that exactly one of the three 'if' conditions is true,
1179 so we include exactly one decimal point */
1180 /* Zero padding on left of digit string */
1181 if (decpt <= 0) {
1182 memset(p, '0', decpt-vdigits_start);
1183 p += decpt - vdigits_start;
1184 *p++ = '.';
1185 memset(p, '0', 0-decpt);
1186 p += 0-decpt;
1187 }
1188 else {
1189 memset(p, '0', 0-vdigits_start);
1190 p += 0 - vdigits_start;
1191 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 /* Digits, with included decimal point */
1194 if (0 < decpt && decpt <= digits_len) {
1195 strncpy(p, digits, decpt-0);
1196 p += decpt-0;
1197 *p++ = '.';
1198 strncpy(p, digits+decpt, digits_len-decpt);
1199 p += digits_len-decpt;
1200 }
1201 else {
1202 strncpy(p, digits, digits_len);
1203 p += digits_len;
1204 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 /* And zeros on the right */
1207 if (digits_len < decpt) {
1208 memset(p, '0', decpt-digits_len);
1209 p += decpt-digits_len;
1210 *p++ = '.';
1211 memset(p, '0', vdigits_end-decpt);
1212 p += vdigits_end-decpt;
1213 }
1214 else {
1215 memset(p, '0', vdigits_end-digits_len);
1216 p += vdigits_end-digits_len;
1217 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 /* Delete a trailing decimal pt unless using alternative formatting. */
1220 if (p[-1] == '.' && !use_alt_formatting)
1221 p--;
Eric Smith0923d1d2009-04-16 20:16:10 +00001222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 /* Now that we've done zero padding, add an exponent if needed. */
1224 if (use_exp) {
1225 *p++ = float_strings[OFS_E][0];
1226 exp_len = sprintf(p, "%+.02d", exp);
1227 p += exp_len;
1228 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001229 exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 if (buf) {
1231 *p = '\0';
1232 /* It's too late if this fails, as we've already stepped on
1233 memory that isn't ours. But it's an okay debugging test. */
1234 assert(p-buf < bufsize);
1235 }
1236 if (digits)
1237 _Py_dg_freedtoa(digits);
Eric Smith0923d1d2009-04-16 20:16:10 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 return buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001240}
1241
1242
Benjamin Petersone5024512018-09-12 12:06:42 -07001243char * PyOS_double_to_string(double val,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 char format_code,
1245 int precision,
1246 int flags,
1247 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +00001248{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001249 const char * const *float_strings = lc_float_strings;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 /* Validate format_code, and map upper and lower case. Compute the
1253 mode and make any adjustments as needed. */
1254 switch (format_code) {
1255 /* exponent */
1256 case 'E':
1257 float_strings = uc_float_strings;
1258 format_code = 'e';
1259 /* Fall through. */
1260 case 'e':
1261 mode = 2;
1262 precision++;
1263 break;
Eric Smith193125a2009-04-16 22:08:31 +00001264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 /* fixed */
1266 case 'F':
1267 float_strings = uc_float_strings;
1268 format_code = 'f';
1269 /* Fall through. */
1270 case 'f':
1271 mode = 3;
1272 break;
Eric Smith193125a2009-04-16 22:08:31 +00001273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 /* general */
1275 case 'G':
1276 float_strings = uc_float_strings;
1277 format_code = 'g';
1278 /* Fall through. */
1279 case 'g':
1280 mode = 2;
1281 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1282 if (precision == 0)
1283 precision = 1;
1284 break;
Eric Smith193125a2009-04-16 22:08:31 +00001285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 /* repr format */
1287 case 'r':
1288 mode = 0;
1289 /* Supplied precision is unused, must be 0. */
1290 if (precision != 0) {
1291 PyErr_BadInternalCall();
1292 return NULL;
1293 }
1294 break;
Eric Smith193125a2009-04-16 22:08:31 +00001295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 default:
1297 PyErr_BadInternalCall();
1298 return NULL;
1299 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001300
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 return format_float_short(val, format_code, mode, precision,
1302 flags & Py_DTSF_SIGN,
1303 flags & Py_DTSF_ADD_DOT_0,
1304 flags & Py_DTSF_ALT,
1305 float_strings, type);
Eric Smith0923d1d2009-04-16 20:16:10 +00001306}
1307#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */