blob: 94dc4818c2f473d2e7a4ac8151fc63deb8efb317 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
Mark Dickinson3b38df22009-10-26 14:36:29 +00006/* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
Mark Dickinsonbd16edd2009-05-20 22:05:25 +00008
9static int
10case_insensitive_match(const char *s, const char *t)
11{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000012 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
15 }
16 return *t ? 0 : 1;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000017}
18
Mark Dickinson3b38df22009-10-26 14:36:29 +000019/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
24
Mark Dickinsone383e822012-04-29 15:31:56 +010025#ifndef PY_NO_SHORT_FLOAT_REPR
26
27double
28_Py_parse_inf_or_nan(const char *p, char **endptr)
29{
30 double retval;
31 const char *s;
32 int negate = 0;
33
34 s = p;
35 if (*s == '-') {
36 negate = 1;
37 s++;
38 }
39 else if (*s == '+') {
40 s++;
41 }
42 if (case_insensitive_match(s, "inf")) {
43 s += 3;
44 if (case_insensitive_match(s, "inity"))
45 s += 5;
46 retval = _Py_dg_infinity(negate);
47 }
48 else if (case_insensitive_match(s, "nan")) {
49 s += 3;
50 retval = _Py_dg_stdnan(negate);
51 }
52 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
58}
59
60#else
61
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000062double
63_Py_parse_inf_or_nan(const char *p, char **endptr)
64{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 double retval;
66 const char *s;
67 int negate = 0;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 s = p;
70 if (*s == '-') {
71 negate = 1;
72 s++;
73 }
74 else if (*s == '+') {
75 s++;
76 }
77 if (case_insensitive_match(s, "inf")) {
78 s += 3;
79 if (case_insensitive_match(s, "inity"))
80 s += 5;
81 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000083#ifdef Py_NAN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 else if (case_insensitive_match(s, "nan")) {
85 s += 3;
86 retval = negate ? -Py_NAN : Py_NAN;
87 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000088#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 else {
90 s = p;
91 retval = -1.0;
92 }
93 *endptr = (char *)s;
94 return retval;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000095}
96
Mark Dickinsone383e822012-04-29 15:31:56 +010097#endif
98
Martin v. Löwis737ea822004-06-08 18:52:54 +000099/**
Eric Smith68af50b2010-02-22 14:58:30 +0000100 * _PyOS_ascii_strtod:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000101 * @nptr: the string to convert to a numeric value.
102 * @endptr: if non-%NULL, it returns the character after
103 * the last character used in the conversion.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000120 * If memory allocation fails, %ENOMEM is stored in %errno.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
Eric Smith0923d1d2009-04-16 20:16:10 +0000127
128#ifndef PY_NO_SHORT_FLOAT_REPR
129
Eric Smith68af50b2010-02-22 14:58:30 +0000130static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000131_PyOS_ascii_strtod(const char *nptr, char **endptr)
Eric Smith0923d1d2009-04-16 20:16:10 +0000132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 double result;
134 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +0000135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 assert(nptr != NULL);
137 /* Set errno to zero, so that we can distinguish zero results
138 and underflows */
139 errno = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 _Py_SET_53BIT_PRECISION_START;
142 result = _Py_dg_strtod(nptr, endptr);
143 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (*endptr == nptr)
146 /* string might represent an inf or nan */
147 result = _Py_parse_inf_or_nan(nptr, endptr);
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return result;
Eric Smith0923d1d2009-04-16 20:16:10 +0000150
151}
152
153#else
154
155/*
156 Use system strtod; since strtod is locale aware, we may
157 have to first fix the decimal separator.
158
159 Note that unlike _Py_dg_strtod, the system strtod may not always give
160 correctly rounded results.
161*/
162
Eric Smith68af50b2010-02-22 14:58:30 +0000163static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000164_PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 char *fail_pos;
Georg Brandl6083a4b2013-10-14 06:51:46 +0200167 double val;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 struct lconv *locale_data;
169 const char *decimal_point;
170 size_t decimal_point_len;
171 const char *p, *decimal_point_pos;
172 const char *end = NULL; /* Silence gcc */
173 const char *digits_pos = NULL;
174 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 assert(nptr != NULL);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 fail_pos = NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 locale_data = localeconv();
181 decimal_point = locale_data->decimal_point;
182 decimal_point_len = strlen(decimal_point);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* Parse infinities and nans */
189 val = _Py_parse_inf_or_nan(nptr, endptr);
190 if (*endptr != nptr)
191 return val;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 /* Set errno to zero, so that we can distinguish zero results
194 and underflows */
195 errno = 0;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 /* We process the optional sign manually, then pass the remainder to
198 the system strtod. This ensures that the result of an underflow
199 has the correct sign. (bug #1725) */
200 p = nptr;
201 /* Process leading sign, if present */
202 if (*p == '-') {
203 negate = 1;
204 p++;
205 }
206 else if (*p == '+') {
207 p++;
208 }
Christian Heimesfaf2f632008-01-06 16:59:19 +0000209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 /* Some platform strtods accept hex floats; Python shouldn't (at the
211 moment), so we check explicitly for strings starting with '0x'. */
212 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 /* Check that what's left begins with a digit or decimal point */
216 if (!Py_ISDIGIT(*p) && *p != '.')
217 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 digits_pos = p;
220 if (decimal_point[0] != '.' ||
221 decimal_point[1] != 0)
222 {
223 /* Look for a '.' in the input; if present, it'll need to be
224 swapped for the current locale's decimal point before we
225 call strtod. On the other hand, if we find the current
226 locale's decimal point then the input is invalid. */
227 while (Py_ISDIGIT(*p))
228 p++;
Neal Norwitze7214a12005-12-18 05:03:17 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 if (*p == '.')
231 {
232 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 /* locate end of number */
235 while (Py_ISDIGIT(*p))
236 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 if (*p == 'e' || *p == 'E')
239 p++;
240 if (*p == '+' || *p == '-')
241 p++;
242 while (Py_ISDIGIT(*p))
243 p++;
244 end = p;
245 }
246 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247 /* Python bug #1417699 */
248 goto invalid_string;
249 /* For the other cases, we need not convert the decimal
250 point */
251 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 if (decimal_point_pos) {
254 char *copy, *c;
255 /* Create a copy of the input, with the '.' converted to the
256 locale-specific decimal point */
257 copy = (char *)PyMem_MALLOC(end - digits_pos +
258 1 + decimal_point_len);
259 if (copy == NULL) {
260 *endptr = (char *)nptr;
261 errno = ENOMEM;
262 return val;
263 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 c = copy;
266 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267 c += decimal_point_pos - digits_pos;
268 memcpy(c, decimal_point, decimal_point_len);
269 c += decimal_point_len;
270 memcpy(c, decimal_point_pos + 1,
271 end - (decimal_point_pos + 1));
272 c += end - (decimal_point_pos + 1);
273 *c = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 val = strtod(copy, &fail_pos);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 if (fail_pos)
278 {
279 if (fail_pos > decimal_point_pos)
280 fail_pos = (char *)digits_pos +
281 (fail_pos - copy) -
282 (decimal_point_len - 1);
283 else
284 fail_pos = (char *)digits_pos +
285 (fail_pos - copy);
286 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 }
291 else {
292 val = strtod(digits_pos, &fail_pos);
293 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 if (fail_pos == digits_pos)
296 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000297
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 if (negate && fail_pos != nptr)
299 val = -val;
300 *endptr = fail_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000303
304 invalid_string:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 *endptr = (char*)nptr;
306 errno = EINVAL;
307 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000308}
309
Eric Smith0923d1d2009-04-16 20:16:10 +0000310#endif
311
Eric Smith68af50b2010-02-22 14:58:30 +0000312/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313 as a string of ASCII characters) to a float. The string should not have
314 leading or trailing whitespace. The conversion is independent of the
315 current locale.
Mark Dickinson725bfd82009-05-03 20:33:40 +0000316
317 If endptr is NULL, try to convert the whole string. Raise ValueError and
318 return -1.0 if the string is not a valid representation of a floating-point
319 number.
320
321 If endptr is non-NULL, try to convert as much of the string as possible.
322 If no initial segment of the string is the valid representation of a
323 floating-point number then *endptr is set to point to the beginning of the
324 string, -1.0 is returned and again ValueError is raised.
325
326 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200328 exception is raised. Otherwise, overflow_exception should point to
Mark Dickinson725bfd82009-05-03 20:33:40 +0000329 a Python exception, this exception will be raised, -1.0 will be returned,
330 and *endptr will point just past the end of the converted value.
331
332 If any other failure occurs (for example lack of memory), -1.0 is returned
333 and the appropriate Python exception will have been set.
334*/
335
336double
337PyOS_string_to_double(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 char **endptr,
339 PyObject *overflow_exception)
Mark Dickinson725bfd82009-05-03 20:33:40 +0000340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 double x, result=-1.0;
342 char *fail_pos;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 errno = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 x = _PyOS_ascii_strtod(s, &fail_pos);
Mark Dickinson725bfd82009-05-03 20:33:40 +0000346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 if (errno == ENOMEM) {
348 PyErr_NoMemory();
349 fail_pos = (char *)s;
350 }
351 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
352 PyErr_Format(PyExc_ValueError,
353 "could not convert string to float: "
Pedro Lacerda4fa75042019-05-17 19:32:44 -0300354 "'%.200s'", s);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 else if (fail_pos == s)
356 PyErr_Format(PyExc_ValueError,
357 "could not convert string to float: "
Pedro Lacerda4fa75042019-05-17 19:32:44 -0300358 "'%.200s'", s);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
360 PyErr_Format(overflow_exception,
361 "value too large to convert to float: "
Pedro Lacerda4fa75042019-05-17 19:32:44 -0300362 "'%.200s'", s);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 else
364 result = x;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 if (endptr != NULL)
367 *endptr = fail_pos;
368 return result;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000369}
Eric Smith0923d1d2009-04-16 20:16:10 +0000370
Brett Cannona721aba2016-09-09 14:57:09 -0700371/* Remove underscores that follow the underscore placement rule from
372 the string and then call the `innerfunc` function on the result.
373 It should return a new object or NULL on exception.
374
375 `what` is used for the error message emitted when underscores are detected
376 that don't follow the rule. `arg` is an opaque pointer passed to the inner
377 function.
378
379 This is used to implement underscore-agnostic conversion for floats
380 and complex numbers.
381*/
382PyObject *
383_Py_string_to_number_with_underscores(
384 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
385 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
386{
387 char prev;
388 const char *p, *last;
389 char *dup, *end;
390 PyObject *result;
391
INADA Naoki16dfca42018-07-14 12:06:43 +0900392 assert(s[orig_len] == '\0');
393
Brett Cannona721aba2016-09-09 14:57:09 -0700394 if (strchr(s, '_') == NULL) {
395 return innerfunc(s, orig_len, arg);
396 }
397
398 dup = PyMem_Malloc(orig_len + 1);
Zackery Spytz4c49da02018-12-07 03:11:30 -0700399 if (dup == NULL) {
400 return PyErr_NoMemory();
401 }
Brett Cannona721aba2016-09-09 14:57:09 -0700402 end = dup;
403 prev = '\0';
404 last = s + orig_len;
405 for (p = s; *p; p++) {
406 if (*p == '_') {
407 /* Underscores are only allowed after digits. */
408 if (!(prev >= '0' && prev <= '9')) {
409 goto error;
410 }
411 }
412 else {
413 *end++ = *p;
414 /* Underscores are only allowed before digits. */
415 if (prev == '_' && !(*p >= '0' && *p <= '9')) {
416 goto error;
417 }
418 }
419 prev = *p;
420 }
421 /* Underscores are not allowed at the end. */
422 if (prev == '_') {
423 goto error;
424 }
425 /* No embedded NULs allowed. */
426 if (p != last) {
427 goto error;
428 }
429 *end = '\0';
430 result = innerfunc(dup, end - dup, arg);
431 PyMem_Free(dup);
432 return result;
433
434 error:
435 PyMem_Free(dup);
436 PyErr_Format(PyExc_ValueError,
Barry Warsawb2e57942017-09-14 18:13:16 -0700437 "could not convert string to %s: "
438 "%R", what, obj);
Brett Cannona721aba2016-09-09 14:57:09 -0700439 return NULL;
440}
441
Eric Smith68af50b2010-02-22 14:58:30 +0000442#ifdef PY_NO_SHORT_FLOAT_REPR
443
Eric Smithb2c7af82008-04-30 02:12:09 +0000444/* Given a string that may have a decimal point in the current
445 locale, change it back to a dot. Since the string cannot get
446 longer, no need for a maximum buffer size parameter. */
447Py_LOCAL_INLINE(void)
448change_decimal_from_locale_to_dot(char* buffer)
449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 struct lconv *locale_data = localeconv();
451 const char *decimal_point = locale_data->decimal_point;
Eric Smithb2c7af82008-04-30 02:12:09 +0000452
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
454 size_t decimal_point_len = strlen(decimal_point);
Eric Smithb2c7af82008-04-30 02:12:09 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 if (*buffer == '+' || *buffer == '-')
457 buffer++;
458 while (Py_ISDIGIT(*buffer))
459 buffer++;
460 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
461 *buffer = '.';
462 buffer++;
463 if (decimal_point_len > 1) {
464 /* buffer needs to get smaller */
465 size_t rest_len = strlen(buffer +
466 (decimal_point_len - 1));
467 memmove(buffer,
468 buffer + (decimal_point_len - 1),
469 rest_len);
470 buffer[rest_len] = 0;
471 }
472 }
473 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000474}
475
Martin v. Löwis737ea822004-06-08 18:52:54 +0000476
Christian Heimesc3f30c42008-02-22 16:37:40 +0000477/* From the C99 standard, section 7.19.6:
478The exponent always contains at least two digits, and only as many more digits
479as necessary to represent the exponent.
480*/
481#define MIN_EXPONENT_DIGITS 2
482
Eric Smithb2c7af82008-04-30 02:12:09 +0000483/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
484 in length. */
485Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000486ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000487{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 char *p = strpbrk(buffer, "eE");
489 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
490 char *start = p + 2;
491 int exponent_digit_cnt = 0;
492 int leading_zero_cnt = 0;
493 int in_leading_zeros = 1;
494 int significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 /* Skip over the exponent and the sign. */
497 p += 2;
Eric Smithb2c7af82008-04-30 02:12:09 +0000498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 /* Find the end of the exponent, keeping track of leading
500 zeros. */
501 while (*p && Py_ISDIGIT(*p)) {
502 if (in_leading_zeros && *p == '0')
503 ++leading_zero_cnt;
504 if (*p != '0')
505 in_leading_zeros = 0;
506 ++p;
507 ++exponent_digit_cnt;
508 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
511 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
512 /* If there are 2 exactly digits, we're done,
513 regardless of what they contain */
514 }
515 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
516 int extra_zeros_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 /* There are more than 2 digits in the exponent. See
519 if we can delete some of the leading zeros */
520 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
521 significant_digit_cnt = MIN_EXPONENT_DIGITS;
522 extra_zeros_cnt = exponent_digit_cnt -
523 significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 /* Delete extra_zeros_cnt worth of characters from the
526 front of the exponent */
527 assert(extra_zeros_cnt >= 0);
Eric Smithb2c7af82008-04-30 02:12:09 +0000528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 /* Add one to significant_digit_cnt to copy the
530 trailing 0 byte, thus setting the length */
531 memmove(start,
532 start + extra_zeros_cnt,
533 significant_digit_cnt + 1);
534 }
535 else {
536 /* If there are fewer than 2 digits, add zeros
537 until there are 2, if there's enough room */
538 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
539 if (start + zeros + exponent_digit_cnt + 1
540 < buffer + buf_size) {
541 memmove(start + zeros, start,
542 exponent_digit_cnt + 1);
543 memset(start, '0', zeros);
544 }
545 }
546 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000547}
548
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000549/* Remove trailing zeros after the decimal point from a numeric string; also
550 remove the decimal point if all digits following it are zero. The numeric
551 string must end in '\0', and should not have any leading or trailing
552 whitespace. Assumes that the decimal point is '.'. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000553Py_LOCAL_INLINE(void)
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000554remove_trailing_zeros(char *buffer)
Eric Smithb2c7af82008-04-30 02:12:09 +0000555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 char *old_fraction_end, *new_fraction_end, *end, *p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 p = buffer;
559 if (*p == '-' || *p == '+')
560 /* Skip leading sign, if present */
561 ++p;
562 while (Py_ISDIGIT(*p))
563 ++p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 /* if there's no decimal point there's nothing to do */
566 if (*p++ != '.')
567 return;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 /* scan any digits after the point */
570 while (Py_ISDIGIT(*p))
571 ++p;
572 old_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000574 /* scan up to ending '\0' */
575 while (*p != '\0')
576 p++;
577 /* +1 to make sure that we move the null byte as well */
578 end = p+1;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 /* scan back from fraction_end, looking for removable zeros */
581 p = old_fraction_end;
582 while (*(p-1) == '0')
583 --p;
584 /* and remove point if we've got that far */
585 if (*(p-1) == '.')
586 --p;
587 new_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000590}
591
592/* Ensure that buffer has a decimal point in it. The decimal point will not
593 be in the current locale, it will always be '.'. Don't add a decimal point
594 if an exponent is present. Also, convert to exponential notation where
595 adding a '.0' would produce too many significant digits (see issue 5864).
596
597 Returns a pointer to the fixed buffer, or NULL on failure.
598*/
599Py_LOCAL_INLINE(char *)
600ensure_decimal_point(char* buffer, size_t buf_size, int precision)
601{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 int digit_count, insert_count = 0, convert_to_exp = 0;
Serhiy Storchakae2f92de2017-11-11 13:06:26 +0200603 const char *chars_to_insert;
604 char *digits_start;
Eric Smithb2c7af82008-04-30 02:12:09 +0000605
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 /* search for the first non-digit character */
607 char *p = buffer;
608 if (*p == '-' || *p == '+')
609 /* Skip leading sign, if present. I think this could only
610 ever be '-', but it can't hurt to check for both. */
611 ++p;
612 digits_start = p;
613 while (*p && Py_ISDIGIT(*p))
614 ++p;
615 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smithb2c7af82008-04-30 02:12:09 +0000616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 if (*p == '.') {
618 if (Py_ISDIGIT(*(p+1))) {
619 /* Nothing to do, we already have a decimal
620 point and a digit after it */
621 }
622 else {
623 /* We have a decimal point, but no following
624 digit. Insert a zero after the decimal. */
625 /* can't ever get here via PyOS_double_to_string */
626 assert(precision == -1);
627 ++p;
628 chars_to_insert = "0";
629 insert_count = 1;
630 }
631 }
632 else if (!(*p == 'e' || *p == 'E')) {
633 /* Don't add ".0" if we have an exponent. */
634 if (digit_count == precision) {
635 /* issue 5864: don't add a trailing .0 in the case
636 where the '%g'-formatted result already has as many
637 significant digits as were requested. Switch to
638 exponential notation instead. */
639 convert_to_exp = 1;
640 /* no exponent, no point, and we shouldn't land here
641 for infs and nans, so we must be at the end of the
642 string. */
643 assert(*p == '\0');
644 }
645 else {
646 assert(precision == -1 || digit_count < precision);
647 chars_to_insert = ".0";
648 insert_count = 2;
649 }
650 }
651 if (insert_count) {
652 size_t buf_len = strlen(buffer);
653 if (buf_len + insert_count + 1 >= buf_size) {
654 /* If there is not enough room in the buffer
655 for the additional text, just skip it. It's
656 not worth generating an error over. */
657 }
658 else {
659 memmove(p + insert_count, p,
660 buffer + strlen(buffer) - p + 1);
661 memcpy(p, chars_to_insert, insert_count);
662 }
663 }
664 if (convert_to_exp) {
665 int written;
666 size_t buf_avail;
667 p = digits_start;
668 /* insert decimal point */
669 assert(digit_count >= 1);
670 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
671 p[1] = '.';
672 p += digit_count+1;
673 assert(p <= buf_size+buffer);
674 buf_avail = buf_size+buffer-p;
675 if (buf_avail == 0)
676 return NULL;
677 /* Add exponent. It's okay to use lower case 'e': we only
678 arrive here as a result of using the empty format code or
679 repr/str builtins and those never want an upper case 'E' */
680 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
681 if (!(0 <= written &&
682 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
683 /* output truncated, or something else bad happened */
684 return NULL;
685 remove_trailing_zeros(buffer);
686 }
687 return buffer;
Eric Smithb2c7af82008-04-30 02:12:09 +0000688}
689
Christian Heimesc3f30c42008-02-22 16:37:40 +0000690/* see FORMATBUFLEN in unicodeobject.c */
691#define FLOAT_FORMATBUFLEN 120
692
Martin v. Löwis737ea822004-06-08 18:52:54 +0000693/**
Eric Smith68af50b2010-02-22 14:58:30 +0000694 * _PyOS_ascii_formatd:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000695 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000696 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000697 * @format: The printf()-style format to use for the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000698 * code to use for converting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000699 * @d: The #gdouble to convert
Eric Smith68af50b2010-02-22 14:58:30 +0000700 * @precision: The precision to use when formatting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000701 *
702 * Converts a #gdouble to a string, using the '.' as
703 * decimal point. To format the number you pass in
704 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000705 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 *
Christian Heimesb186d002008-03-18 15:15:01 +0000707 * 'Z' is the same as 'g', except it always has a decimal and
708 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000709 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000710 * Return value: The pointer to the buffer with the converted string.
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000711 * On failure returns NULL but does not set any Python exception.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000712 **/
Eric Smith68af50b2010-02-22 14:58:30 +0000713static char *
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714_PyOS_ascii_formatd(char *buffer,
715 size_t buf_size,
716 const char *format,
717 double d,
718 int precision)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000719{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 char format_char;
721 size_t format_len = strlen(format);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
724 also with at least one character past the decimal. */
725 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 /* The last character in the format string must be the format char */
728 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 if (format[0] != '%')
731 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 /* I'm not sure why this test is here. It's ensuring that the format
734 string after the first character doesn't have a single quote, a
735 lowercase l, or a percent. This is the reverse of the commented-out
736 test about 10 lines ago. */
737 if (strpbrk(format + 1, "'l%"))
738 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 /* Also curious about this function is that it accepts format strings
741 like "%xg", which are invalid for floats. In general, the
742 interface to this function is not very good, but changing it is
743 difficult because it's a public API. */
Christian Heimesb186d002008-03-18 15:15:01 +0000744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000745 if (!(format_char == 'e' || format_char == 'E' ||
746 format_char == 'f' || format_char == 'F' ||
747 format_char == 'g' || format_char == 'G' ||
748 format_char == 'Z'))
749 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 /* Map 'Z' format_char to 'g', by copying the format string and
752 replacing the final char with a 'g' */
753 if (format_char == 'Z') {
754 if (format_len + 1 >= sizeof(tmp_format)) {
755 /* The format won't fit in our copy. Error out. In
756 practice, this will never happen and will be
757 detected by returning NULL */
758 return NULL;
759 }
760 strcpy(tmp_format, format);
761 tmp_format[format_len - 1] = 'g';
762 format = tmp_format;
763 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000764
Christian Heimesb186d002008-03-18 15:15:01 +0000765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 /* Have PyOS_snprintf do the hard work */
767 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 /* Get the current locale, and find the decimal point string.
772 Convert that string back to a dot. */
773 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 /* If an exponent exists, ensure that the exponent is at least
776 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
777 for the extra zeros. Also, if there are more than
778 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
779 back to MIN_EXPONENT_DIGITS */
780 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 /* If format_char is 'Z', make sure we have at least one character
783 after the decimal point (and make sure we have a decimal point);
784 also switch to exponential notation in some edge cases where the
785 extra character would produce more significant digits that we
786 really want. */
787 if (format_char == 'Z')
788 buffer = ensure_decimal_point(buffer, buf_size, precision);
Christian Heimesb186d002008-03-18 15:15:01 +0000789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 return buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000791}
792
Eric Smith0923d1d2009-04-16 20:16:10 +0000793/* The fallback code to use if _Py_dg_dtoa is not available. */
794
Benjamin Petersone5024512018-09-12 12:06:42 -0700795char * PyOS_double_to_string(double val,
Eric Smith0923d1d2009-04-16 20:16:10 +0000796 char format_code,
797 int precision,
798 int flags,
799 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000800{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 char format[32];
802 Py_ssize_t bufsize;
803 char *buf;
804 int t, exp;
805 int upper = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 /* Validate format_code, and map upper and lower case */
808 switch (format_code) {
809 case 'e': /* exponent */
810 case 'f': /* fixed */
811 case 'g': /* general */
812 break;
813 case 'E':
814 upper = 1;
815 format_code = 'e';
816 break;
817 case 'F':
818 upper = 1;
819 format_code = 'f';
820 break;
821 case 'G':
822 upper = 1;
823 format_code = 'g';
824 break;
825 case 'r': /* repr format */
826 /* Supplied precision is unused, must be 0. */
827 if (precision != 0) {
828 PyErr_BadInternalCall();
829 return NULL;
830 }
831 /* The repr() precision (17 significant decimal digits) is the
832 minimal number that is guaranteed to have enough precision
833 so that if the number is read back in the exact same binary
834 value is recreated. This is true for IEEE floating point
835 by design, and also happens to work for all other modern
836 hardware. */
837 precision = 17;
838 format_code = 'g';
839 break;
840 default:
841 PyErr_BadInternalCall();
842 return NULL;
843 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 /* Here's a quick-and-dirty calculation to figure out how big a buffer
846 we need. In general, for a finite float we need:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000848 1 byte for each digit of the decimal significand, and
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 1 for a possible sign
851 1 for a possible decimal point
852 2 for a possible [eE][+-]
853 1 for each digit of the exponent; if we allow 19 digits
854 total then we're safe up to exponents of 2**63.
855 1 for the trailing nul byte
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 This gives a total of 24 + the number of digits in the significand,
858 and the number of digits in the significand is:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 for 'g' format: at most precision, except possibly
861 when precision == 0, when it's 1.
862 for 'e' format: precision+1
863 for 'f' format: precision digits after the point, at least 1
864 before. To figure out how many digits appear before the point
865 we have to examine the size of the number. If fabs(val) < 1.0
866 then there will be only one digit before the point. If
867 fabs(val) >= 1.0, then there are at most
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 1+floor(log10(ceiling(fabs(val))))
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 digits before the point (where the 'ceiling' allows for the
872 possibility that the rounding rounds the integer part of val
873 up). A safe upper bound for the above quantity is
874 1+floor(exp/3), where exp is the unique integer such that 0.5
875 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
876 frexp.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 So we allow room for precision+1 digits for all formats, plus an
879 extra floor(exp/3) digits for 'f' format.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 */
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
884 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
885 bufsize = 5;
886 else {
887 bufsize = 25 + precision;
888 if (format_code == 'f' && fabs(val) >= 1.0) {
889 frexp(val, &exp);
890 bufsize += exp/3;
891 }
892 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 buf = PyMem_Malloc(bufsize);
895 if (buf == NULL) {
896 PyErr_NoMemory();
897 return NULL;
898 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 /* Handle nan and inf. */
901 if (Py_IS_NAN(val)) {
902 strcpy(buf, "nan");
903 t = Py_DTST_NAN;
904 } else if (Py_IS_INFINITY(val)) {
905 if (copysign(1., val) == 1.)
906 strcpy(buf, "inf");
907 else
908 strcpy(buf, "-inf");
909 t = Py_DTST_INFINITE;
910 } else {
911 t = Py_DTST_FINITE;
912 if (flags & Py_DTSF_ADD_DOT_0)
913 format_code = 'Z';
Eric Smith0923d1d2009-04-16 20:16:10 +0000914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
916 (flags & Py_DTSF_ALT ? "#" : ""), precision,
917 format_code);
918 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
919 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 /* Add sign when requested. It's convenient (esp. when formatting
922 complex numbers) to include a sign even for inf and nan. */
923 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
924 size_t len = strlen(buf);
925 /* the bufsize calculations above should ensure that we've got
926 space to add a sign */
927 assert((size_t)bufsize >= len+2);
928 memmove(buf+1, buf, len+1);
929 buf[0] = '+';
930 }
931 if (upper) {
932 /* Convert to upper case. */
933 char *p1;
934 for (p1 = buf; *p1; p1++)
935 *p1 = Py_TOUPPER(*p1);
936 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 if (type)
939 *type = t;
940 return buf;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000941}
Eric Smith0923d1d2009-04-16 20:16:10 +0000942
943#else
944
945/* _Py_dg_dtoa is available. */
946
947/* I'm using a lookup table here so that I don't have to invent a non-locale
948 specific way to convert to uppercase */
949#define OFS_INF 0
950#define OFS_NAN 1
951#define OFS_E 2
952
953/* The lengths of these are known to the code below, so don't change them */
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200954static const char * const lc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 "inf",
956 "nan",
957 "e",
Eric Smith0923d1d2009-04-16 20:16:10 +0000958};
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200959static const char * const uc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 "INF",
961 "NAN",
962 "E",
Eric Smith0923d1d2009-04-16 20:16:10 +0000963};
964
965
966/* Convert a double d to a string, and return a PyMem_Malloc'd block of
967 memory contain the resulting string.
968
969 Arguments:
970 d is the double to be converted
Eric Smith63376222009-05-05 14:04:18 +0000971 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
972 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
Eric Smith0923d1d2009-04-16 20:16:10 +0000973 mode is one of '0', '2' or '3', and is completely determined by
Eric Smith63376222009-05-05 14:04:18 +0000974 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
Eric Smith0923d1d2009-04-16 20:16:10 +0000975 precision is the desired precision
976 always_add_sign is nonzero if a '+' sign should be included for positive
977 numbers
978 add_dot_0_if_integer is nonzero if integers in non-exponential form
Eric Smith63376222009-05-05 14:04:18 +0000979 should have ".0" added. Only applies to format codes 'r' and 'g'.
Eric Smith0923d1d2009-04-16 20:16:10 +0000980 use_alt_formatting is nonzero if alternative formatting should be
Eric Smith63376222009-05-05 14:04:18 +0000981 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
982 at most one of use_alt_formatting and add_dot_0_if_integer should
983 be nonzero.
Eric Smith0923d1d2009-04-16 20:16:10 +0000984 type, if non-NULL, will be set to one of these constants to identify
985 the type of the 'd' argument:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 Py_DTST_FINITE
987 Py_DTST_INFINITE
988 Py_DTST_NAN
Eric Smith0923d1d2009-04-16 20:16:10 +0000989
990 Returns a PyMem_Malloc'd block of memory containing the resulting string,
991 or NULL on error. If NULL is returned, the Python error has been set.
992 */
993
994static char *
995format_float_short(double d, char format_code,
Victor Stinner7b251352013-06-24 23:37:40 +0200996 int mode, int precision,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 int always_add_sign, int add_dot_0_if_integer,
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200998 int use_alt_formatting, const char * const *float_strings,
999 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +00001000{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 char *buf = NULL;
1002 char *p = NULL;
1003 Py_ssize_t bufsize = 0;
1004 char *digits, *digits_end;
1005 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1006 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1007 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +00001008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1010 Must be matched by a call to _Py_dg_freedtoa. */
1011 _Py_SET_53BIT_PRECISION_START;
1012 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1013 &digits_end);
1014 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 decpt = (Py_ssize_t)decpt_as_int;
1017 if (digits == NULL) {
1018 /* The only failure mode is no memory. */
1019 PyErr_NoMemory();
1020 goto exit;
1021 }
1022 assert(digits_end != NULL && digits_end >= digits);
1023 digits_len = digits_end - digits;
Eric Smith0923d1d2009-04-16 20:16:10 +00001024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 if (digits_len && !Py_ISDIGIT(digits[0])) {
1026 /* Infinities and nans here; adapt Gay's output,
1027 so convert Infinity to inf and NaN to nan, and
1028 ignore sign of nan. Then return. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 /* ignore the actual sign of a nan */
1031 if (digits[0] == 'n' || digits[0] == 'N')
1032 sign = 0;
Mark Dickinsonad476da2009-04-23 19:14:16 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 /* We only need 5 bytes to hold the result "+inf\0" . */
1035 bufsize = 5; /* Used later in an assert. */
1036 buf = (char *)PyMem_Malloc(bufsize);
1037 if (buf == NULL) {
1038 PyErr_NoMemory();
1039 goto exit;
1040 }
1041 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001042
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 if (sign == 1) {
1044 *p++ = '-';
1045 }
1046 else if (always_add_sign) {
1047 *p++ = '+';
1048 }
1049 if (digits[0] == 'i' || digits[0] == 'I') {
1050 strncpy(p, float_strings[OFS_INF], 3);
1051 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +00001052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 if (type)
1054 *type = Py_DTST_INFINITE;
1055 }
1056 else if (digits[0] == 'n' || digits[0] == 'N') {
1057 strncpy(p, float_strings[OFS_NAN], 3);
1058 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +00001059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 if (type)
1061 *type = Py_DTST_NAN;
1062 }
1063 else {
1064 /* shouldn't get here: Gay's code should always return
1065 something starting with a digit, an 'I', or 'N' */
Barry Warsawb2e57942017-09-14 18:13:16 -07001066 Py_UNREACHABLE();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 }
1068 goto exit;
1069 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 /* The result must be finite (not inf or nan). */
1072 if (type)
1073 *type = Py_DTST_FINITE;
Eric Smith0923d1d2009-04-16 20:16:10 +00001074
1075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 /* We got digits back, format them. We may need to pad 'digits'
1077 either on the left or right (or both) with extra zeros, so in
1078 general the resulting string has the form
Eric Smith0923d1d2009-04-16 20:16:10 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 [<sign>]<zeros><digits><zeros>[<exponent>]
Eric Smith0923d1d2009-04-16 20:16:10 +00001081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 where either of the <zeros> pieces could be empty, and there's a
1083 decimal point that could appear either in <digits> or in the
1084 leading or trailing <zeros>.
Eric Smith0923d1d2009-04-16 20:16:10 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 Imagine an infinite 'virtual' string vdigits, consisting of the
1087 string 'digits' (starting at index 0) padded on both the left and
1088 right with infinite strings of zeros. We want to output a slice
Eric Smith0923d1d2009-04-16 20:16:10 +00001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 vdigits[vdigits_start : vdigits_end]
Eric Smith0923d1d2009-04-16 20:16:10 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 of this virtual string. Thus if vdigits_start < 0 then we'll end
1093 up producing some leading zeros; if vdigits_end > digits_len there
1094 will be trailing zeros in the output. The next section of code
1095 determines whether to use an exponent or not, figures out the
1096 position 'decpt' of the decimal point, and computes 'vdigits_start'
1097 and 'vdigits_end'. */
1098 vdigits_end = digits_len;
1099 switch (format_code) {
1100 case 'e':
1101 use_exp = 1;
1102 vdigits_end = precision;
1103 break;
1104 case 'f':
1105 vdigits_end = decpt + precision;
1106 break;
1107 case 'g':
1108 if (decpt <= -4 || decpt >
1109 (add_dot_0_if_integer ? precision-1 : precision))
1110 use_exp = 1;
1111 if (use_alt_formatting)
1112 vdigits_end = precision;
1113 break;
1114 case 'r':
1115 /* convert to exponential format at 1e16. We used to convert
1116 at 1e17, but that gives odd-looking results for some values
1117 when a 16-digit 'shortest' repr is padded with bogus zeros.
1118 For example, repr(2e16+8) would give 20000000000000010.0;
1119 the true value is 20000000000000008.0. */
1120 if (decpt <= -4 || decpt > 16)
1121 use_exp = 1;
1122 break;
1123 default:
1124 PyErr_BadInternalCall();
1125 goto exit;
1126 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 /* if using an exponent, reset decimal point position to 1 and adjust
1129 exponent accordingly.*/
1130 if (use_exp) {
Victor Stinner7b251352013-06-24 23:37:40 +02001131 exp = (int)decpt - 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 decpt = 1;
1133 }
1134 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1135 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1136 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1137 if (!use_exp && add_dot_0_if_integer)
1138 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1139 else
1140 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
Eric Smith0923d1d2009-04-16 20:16:10 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 /* double check inequalities */
1143 assert(vdigits_start <= 0 &&
1144 0 <= digits_len &&
1145 digits_len <= vdigits_end);
1146 /* decimal point should be in (vdigits_start, vdigits_end] */
1147 assert(vdigits_start < decpt && decpt <= vdigits_end);
Eric Smith0923d1d2009-04-16 20:16:10 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 /* Compute an upper bound how much memory we need. This might be a few
1150 chars too long, but no big deal. */
1151 bufsize =
1152 /* sign, decimal point and trailing 0 byte */
1153 3 +
Eric Smith0923d1d2009-04-16 20:16:10 +00001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 /* total digit count (including zero padding on both sides) */
1156 (vdigits_end - vdigits_start) +
Eric Smith0923d1d2009-04-16 20:16:10 +00001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 /* exponent "e+100", max 3 numerical digits */
1159 (use_exp ? 5 : 0);
Eric Smith0923d1d2009-04-16 20:16:10 +00001160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 /* Now allocate the memory and initialize p to point to the start of
1162 it. */
1163 buf = (char *)PyMem_Malloc(bufsize);
1164 if (buf == NULL) {
1165 PyErr_NoMemory();
1166 goto exit;
1167 }
1168 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 /* Add a negative sign if negative, and a plus sign if non-negative
1171 and always_add_sign is true. */
1172 if (sign == 1)
1173 *p++ = '-';
1174 else if (always_add_sign)
1175 *p++ = '+';
Eric Smith0923d1d2009-04-16 20:16:10 +00001176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 /* note that exactly one of the three 'if' conditions is true,
1178 so we include exactly one decimal point */
1179 /* Zero padding on left of digit string */
1180 if (decpt <= 0) {
1181 memset(p, '0', decpt-vdigits_start);
1182 p += decpt - vdigits_start;
1183 *p++ = '.';
1184 memset(p, '0', 0-decpt);
1185 p += 0-decpt;
1186 }
1187 else {
1188 memset(p, '0', 0-vdigits_start);
1189 p += 0 - vdigits_start;
1190 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 /* Digits, with included decimal point */
1193 if (0 < decpt && decpt <= digits_len) {
1194 strncpy(p, digits, decpt-0);
1195 p += decpt-0;
1196 *p++ = '.';
1197 strncpy(p, digits+decpt, digits_len-decpt);
1198 p += digits_len-decpt;
1199 }
1200 else {
1201 strncpy(p, digits, digits_len);
1202 p += digits_len;
1203 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 /* And zeros on the right */
1206 if (digits_len < decpt) {
1207 memset(p, '0', decpt-digits_len);
1208 p += decpt-digits_len;
1209 *p++ = '.';
1210 memset(p, '0', vdigits_end-decpt);
1211 p += vdigits_end-decpt;
1212 }
1213 else {
1214 memset(p, '0', vdigits_end-digits_len);
1215 p += vdigits_end-digits_len;
1216 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001217
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 /* Delete a trailing decimal pt unless using alternative formatting. */
1219 if (p[-1] == '.' && !use_alt_formatting)
1220 p--;
Eric Smith0923d1d2009-04-16 20:16:10 +00001221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 /* Now that we've done zero padding, add an exponent if needed. */
1223 if (use_exp) {
1224 *p++ = float_strings[OFS_E][0];
1225 exp_len = sprintf(p, "%+.02d", exp);
1226 p += exp_len;
1227 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001228 exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (buf) {
1230 *p = '\0';
1231 /* It's too late if this fails, as we've already stepped on
1232 memory that isn't ours. But it's an okay debugging test. */
1233 assert(p-buf < bufsize);
1234 }
1235 if (digits)
1236 _Py_dg_freedtoa(digits);
Eric Smith0923d1d2009-04-16 20:16:10 +00001237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 return buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001239}
1240
1241
Benjamin Petersone5024512018-09-12 12:06:42 -07001242char * PyOS_double_to_string(double val,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 char format_code,
1244 int precision,
1245 int flags,
1246 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +00001247{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001248 const char * const *float_strings = lc_float_strings;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +00001250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 /* Validate format_code, and map upper and lower case. Compute the
1252 mode and make any adjustments as needed. */
1253 switch (format_code) {
1254 /* exponent */
1255 case 'E':
1256 float_strings = uc_float_strings;
1257 format_code = 'e';
1258 /* Fall through. */
1259 case 'e':
1260 mode = 2;
1261 precision++;
1262 break;
Eric Smith193125a2009-04-16 22:08:31 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 /* fixed */
1265 case 'F':
1266 float_strings = uc_float_strings;
1267 format_code = 'f';
1268 /* Fall through. */
1269 case 'f':
1270 mode = 3;
1271 break;
Eric Smith193125a2009-04-16 22:08:31 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 /* general */
1274 case 'G':
1275 float_strings = uc_float_strings;
1276 format_code = 'g';
1277 /* Fall through. */
1278 case 'g':
1279 mode = 2;
1280 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1281 if (precision == 0)
1282 precision = 1;
1283 break;
Eric Smith193125a2009-04-16 22:08:31 +00001284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 /* repr format */
1286 case 'r':
1287 mode = 0;
1288 /* Supplied precision is unused, must be 0. */
1289 if (precision != 0) {
1290 PyErr_BadInternalCall();
1291 return NULL;
1292 }
1293 break;
Eric Smith193125a2009-04-16 22:08:31 +00001294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 default:
1296 PyErr_BadInternalCall();
1297 return NULL;
1298 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001299
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 return format_float_short(val, format_code, mode, precision,
1301 flags & Py_DTSF_SIGN,
1302 flags & Py_DTSF_ADD_DOT_0,
1303 flags & Py_DTSF_ALT,
1304 float_strings, type);
Eric Smith0923d1d2009-04-16 20:16:10 +00001305}
1306#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */