blob: 461e8dcb5e0c4a879202234e748eecef9c2c2337 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
Mark Dickinson3b38df22009-10-26 14:36:29 +00006/* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
Mark Dickinsonbd16edd2009-05-20 22:05:25 +00008
9static int
10case_insensitive_match(const char *s, const char *t)
11{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000012 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
15 }
16 return *t ? 0 : 1;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000017}
18
Mark Dickinson3b38df22009-10-26 14:36:29 +000019/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
24
Mark Dickinsone383e822012-04-29 15:31:56 +010025#ifndef PY_NO_SHORT_FLOAT_REPR
26
27double
28_Py_parse_inf_or_nan(const char *p, char **endptr)
29{
30 double retval;
31 const char *s;
32 int negate = 0;
33
34 s = p;
35 if (*s == '-') {
36 negate = 1;
37 s++;
38 }
39 else if (*s == '+') {
40 s++;
41 }
42 if (case_insensitive_match(s, "inf")) {
43 s += 3;
44 if (case_insensitive_match(s, "inity"))
45 s += 5;
46 retval = _Py_dg_infinity(negate);
47 }
48 else if (case_insensitive_match(s, "nan")) {
49 s += 3;
50 retval = _Py_dg_stdnan(negate);
51 }
52 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
58}
59
60#else
61
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000062double
63_Py_parse_inf_or_nan(const char *p, char **endptr)
64{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 double retval;
66 const char *s;
67 int negate = 0;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 s = p;
70 if (*s == '-') {
71 negate = 1;
72 s++;
73 }
74 else if (*s == '+') {
75 s++;
76 }
77 if (case_insensitive_match(s, "inf")) {
78 s += 3;
79 if (case_insensitive_match(s, "inity"))
80 s += 5;
81 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000083#ifdef Py_NAN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 else if (case_insensitive_match(s, "nan")) {
85 s += 3;
86 retval = negate ? -Py_NAN : Py_NAN;
87 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000088#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 else {
90 s = p;
91 retval = -1.0;
92 }
93 *endptr = (char *)s;
94 return retval;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000095}
96
Mark Dickinsone383e822012-04-29 15:31:56 +010097#endif
98
Martin v. Löwis737ea822004-06-08 18:52:54 +000099/**
Eric Smith68af50b2010-02-22 14:58:30 +0000100 * _PyOS_ascii_strtod:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000101 * @nptr: the string to convert to a numeric value.
102 * @endptr: if non-%NULL, it returns the character after
103 * the last character used in the conversion.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000120 * If memory allocation fails, %ENOMEM is stored in %errno.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
Eric Smith0923d1d2009-04-16 20:16:10 +0000127
128#ifndef PY_NO_SHORT_FLOAT_REPR
129
Eric Smith68af50b2010-02-22 14:58:30 +0000130static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000131_PyOS_ascii_strtod(const char *nptr, char **endptr)
Eric Smith0923d1d2009-04-16 20:16:10 +0000132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 double result;
134 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +0000135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 assert(nptr != NULL);
137 /* Set errno to zero, so that we can distinguish zero results
138 and underflows */
139 errno = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 _Py_SET_53BIT_PRECISION_START;
142 result = _Py_dg_strtod(nptr, endptr);
143 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (*endptr == nptr)
146 /* string might represent an inf or nan */
147 result = _Py_parse_inf_or_nan(nptr, endptr);
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return result;
Eric Smith0923d1d2009-04-16 20:16:10 +0000150
151}
152
153#else
154
155/*
156 Use system strtod; since strtod is locale aware, we may
157 have to first fix the decimal separator.
158
159 Note that unlike _Py_dg_strtod, the system strtod may not always give
160 correctly rounded results.
161*/
162
Eric Smith68af50b2010-02-22 14:58:30 +0000163static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000164_PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 char *fail_pos;
Georg Brandl6083a4b2013-10-14 06:51:46 +0200167 double val;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 struct lconv *locale_data;
169 const char *decimal_point;
170 size_t decimal_point_len;
171 const char *p, *decimal_point_pos;
172 const char *end = NULL; /* Silence gcc */
173 const char *digits_pos = NULL;
174 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 assert(nptr != NULL);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 fail_pos = NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 locale_data = localeconv();
181 decimal_point = locale_data->decimal_point;
182 decimal_point_len = strlen(decimal_point);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* Parse infinities and nans */
189 val = _Py_parse_inf_or_nan(nptr, endptr);
190 if (*endptr != nptr)
191 return val;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 /* Set errno to zero, so that we can distinguish zero results
194 and underflows */
195 errno = 0;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 /* We process the optional sign manually, then pass the remainder to
198 the system strtod. This ensures that the result of an underflow
199 has the correct sign. (bug #1725) */
200 p = nptr;
201 /* Process leading sign, if present */
202 if (*p == '-') {
203 negate = 1;
204 p++;
205 }
206 else if (*p == '+') {
207 p++;
208 }
Christian Heimesfaf2f632008-01-06 16:59:19 +0000209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 /* Some platform strtods accept hex floats; Python shouldn't (at the
211 moment), so we check explicitly for strings starting with '0x'. */
212 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 /* Check that what's left begins with a digit or decimal point */
216 if (!Py_ISDIGIT(*p) && *p != '.')
217 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 digits_pos = p;
220 if (decimal_point[0] != '.' ||
221 decimal_point[1] != 0)
222 {
223 /* Look for a '.' in the input; if present, it'll need to be
224 swapped for the current locale's decimal point before we
225 call strtod. On the other hand, if we find the current
226 locale's decimal point then the input is invalid. */
227 while (Py_ISDIGIT(*p))
228 p++;
Neal Norwitze7214a12005-12-18 05:03:17 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 if (*p == '.')
231 {
232 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 /* locate end of number */
235 while (Py_ISDIGIT(*p))
236 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 if (*p == 'e' || *p == 'E')
239 p++;
240 if (*p == '+' || *p == '-')
241 p++;
242 while (Py_ISDIGIT(*p))
243 p++;
244 end = p;
245 }
246 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247 /* Python bug #1417699 */
248 goto invalid_string;
249 /* For the other cases, we need not convert the decimal
250 point */
251 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 if (decimal_point_pos) {
254 char *copy, *c;
255 /* Create a copy of the input, with the '.' converted to the
256 locale-specific decimal point */
257 copy = (char *)PyMem_MALLOC(end - digits_pos +
258 1 + decimal_point_len);
259 if (copy == NULL) {
260 *endptr = (char *)nptr;
261 errno = ENOMEM;
262 return val;
263 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 c = copy;
266 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267 c += decimal_point_pos - digits_pos;
268 memcpy(c, decimal_point, decimal_point_len);
269 c += decimal_point_len;
270 memcpy(c, decimal_point_pos + 1,
271 end - (decimal_point_pos + 1));
272 c += end - (decimal_point_pos + 1);
273 *c = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 val = strtod(copy, &fail_pos);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 if (fail_pos)
278 {
279 if (fail_pos > decimal_point_pos)
280 fail_pos = (char *)digits_pos +
281 (fail_pos - copy) -
282 (decimal_point_len - 1);
283 else
284 fail_pos = (char *)digits_pos +
285 (fail_pos - copy);
286 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 }
291 else {
292 val = strtod(digits_pos, &fail_pos);
293 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 if (fail_pos == digits_pos)
296 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000297
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 if (negate && fail_pos != nptr)
299 val = -val;
300 *endptr = fail_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000303
304 invalid_string:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 *endptr = (char*)nptr;
306 errno = EINVAL;
307 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000308}
309
Eric Smith0923d1d2009-04-16 20:16:10 +0000310#endif
311
Eric Smith68af50b2010-02-22 14:58:30 +0000312/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313 as a string of ASCII characters) to a float. The string should not have
314 leading or trailing whitespace. The conversion is independent of the
315 current locale.
Mark Dickinson725bfd82009-05-03 20:33:40 +0000316
317 If endptr is NULL, try to convert the whole string. Raise ValueError and
318 return -1.0 if the string is not a valid representation of a floating-point
319 number.
320
321 If endptr is non-NULL, try to convert as much of the string as possible.
322 If no initial segment of the string is the valid representation of a
323 floating-point number then *endptr is set to point to the beginning of the
324 string, -1.0 is returned and again ValueError is raised.
325
326 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200328 exception is raised. Otherwise, overflow_exception should point to
Mark Dickinson725bfd82009-05-03 20:33:40 +0000329 a Python exception, this exception will be raised, -1.0 will be returned,
330 and *endptr will point just past the end of the converted value.
331
332 If any other failure occurs (for example lack of memory), -1.0 is returned
333 and the appropriate Python exception will have been set.
334*/
335
336double
337PyOS_string_to_double(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 char **endptr,
339 PyObject *overflow_exception)
Mark Dickinson725bfd82009-05-03 20:33:40 +0000340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 double x, result=-1.0;
342 char *fail_pos;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 errno = 0;
345 PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346 x = _PyOS_ascii_strtod(s, &fail_pos);
347 PyFPE_END_PROTECT(x)
Mark Dickinson725bfd82009-05-03 20:33:40 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 if (errno == ENOMEM) {
350 PyErr_NoMemory();
351 fail_pos = (char *)s;
352 }
353 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354 PyErr_Format(PyExc_ValueError,
355 "could not convert string to float: "
356 "%.200s", s);
357 else if (fail_pos == s)
358 PyErr_Format(PyExc_ValueError,
359 "could not convert string to float: "
360 "%.200s", s);
361 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362 PyErr_Format(overflow_exception,
363 "value too large to convert to float: "
364 "%.200s", s);
365 else
366 result = x;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 if (endptr != NULL)
369 *endptr = fail_pos;
370 return result;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000371}
Eric Smith0923d1d2009-04-16 20:16:10 +0000372
Brett Cannona721aba2016-09-09 14:57:09 -0700373/* Remove underscores that follow the underscore placement rule from
374 the string and then call the `innerfunc` function on the result.
375 It should return a new object or NULL on exception.
376
377 `what` is used for the error message emitted when underscores are detected
378 that don't follow the rule. `arg` is an opaque pointer passed to the inner
379 function.
380
381 This is used to implement underscore-agnostic conversion for floats
382 and complex numbers.
383*/
384PyObject *
385_Py_string_to_number_with_underscores(
386 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388{
389 char prev;
390 const char *p, *last;
391 char *dup, *end;
392 PyObject *result;
393
INADA Naoki16dfca42018-07-14 12:06:43 +0900394 assert(s[orig_len] == '\0');
395
Brett Cannona721aba2016-09-09 14:57:09 -0700396 if (strchr(s, '_') == NULL) {
397 return innerfunc(s, orig_len, arg);
398 }
399
400 dup = PyMem_Malloc(orig_len + 1);
401 end = dup;
402 prev = '\0';
403 last = s + orig_len;
404 for (p = s; *p; p++) {
405 if (*p == '_') {
406 /* Underscores are only allowed after digits. */
407 if (!(prev >= '0' && prev <= '9')) {
408 goto error;
409 }
410 }
411 else {
412 *end++ = *p;
413 /* Underscores are only allowed before digits. */
414 if (prev == '_' && !(*p >= '0' && *p <= '9')) {
415 goto error;
416 }
417 }
418 prev = *p;
419 }
420 /* Underscores are not allowed at the end. */
421 if (prev == '_') {
422 goto error;
423 }
424 /* No embedded NULs allowed. */
425 if (p != last) {
426 goto error;
427 }
428 *end = '\0';
429 result = innerfunc(dup, end - dup, arg);
430 PyMem_Free(dup);
431 return result;
432
433 error:
434 PyMem_Free(dup);
435 PyErr_Format(PyExc_ValueError,
Barry Warsawb2e57942017-09-14 18:13:16 -0700436 "could not convert string to %s: "
437 "%R", what, obj);
Brett Cannona721aba2016-09-09 14:57:09 -0700438 return NULL;
439}
440
Eric Smith68af50b2010-02-22 14:58:30 +0000441#ifdef PY_NO_SHORT_FLOAT_REPR
442
Eric Smithb2c7af82008-04-30 02:12:09 +0000443/* Given a string that may have a decimal point in the current
444 locale, change it back to a dot. Since the string cannot get
445 longer, no need for a maximum buffer size parameter. */
446Py_LOCAL_INLINE(void)
447change_decimal_from_locale_to_dot(char* buffer)
448{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 struct lconv *locale_data = localeconv();
450 const char *decimal_point = locale_data->decimal_point;
Eric Smithb2c7af82008-04-30 02:12:09 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
453 size_t decimal_point_len = strlen(decimal_point);
Eric Smithb2c7af82008-04-30 02:12:09 +0000454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 if (*buffer == '+' || *buffer == '-')
456 buffer++;
457 while (Py_ISDIGIT(*buffer))
458 buffer++;
459 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
460 *buffer = '.';
461 buffer++;
462 if (decimal_point_len > 1) {
463 /* buffer needs to get smaller */
464 size_t rest_len = strlen(buffer +
465 (decimal_point_len - 1));
466 memmove(buffer,
467 buffer + (decimal_point_len - 1),
468 rest_len);
469 buffer[rest_len] = 0;
470 }
471 }
472 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000473}
474
Martin v. Löwis737ea822004-06-08 18:52:54 +0000475
Christian Heimesc3f30c42008-02-22 16:37:40 +0000476/* From the C99 standard, section 7.19.6:
477The exponent always contains at least two digits, and only as many more digits
478as necessary to represent the exponent.
479*/
480#define MIN_EXPONENT_DIGITS 2
481
Eric Smithb2c7af82008-04-30 02:12:09 +0000482/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
483 in length. */
484Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000485ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 char *p = strpbrk(buffer, "eE");
488 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
489 char *start = p + 2;
490 int exponent_digit_cnt = 0;
491 int leading_zero_cnt = 0;
492 int in_leading_zeros = 1;
493 int significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 /* Skip over the exponent and the sign. */
496 p += 2;
Eric Smithb2c7af82008-04-30 02:12:09 +0000497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 /* Find the end of the exponent, keeping track of leading
499 zeros. */
500 while (*p && Py_ISDIGIT(*p)) {
501 if (in_leading_zeros && *p == '0')
502 ++leading_zero_cnt;
503 if (*p != '0')
504 in_leading_zeros = 0;
505 ++p;
506 ++exponent_digit_cnt;
507 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
510 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
511 /* If there are 2 exactly digits, we're done,
512 regardless of what they contain */
513 }
514 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
515 int extra_zeros_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 /* There are more than 2 digits in the exponent. See
518 if we can delete some of the leading zeros */
519 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
520 significant_digit_cnt = MIN_EXPONENT_DIGITS;
521 extra_zeros_cnt = exponent_digit_cnt -
522 significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 /* Delete extra_zeros_cnt worth of characters from the
525 front of the exponent */
526 assert(extra_zeros_cnt >= 0);
Eric Smithb2c7af82008-04-30 02:12:09 +0000527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 /* Add one to significant_digit_cnt to copy the
529 trailing 0 byte, thus setting the length */
530 memmove(start,
531 start + extra_zeros_cnt,
532 significant_digit_cnt + 1);
533 }
534 else {
535 /* If there are fewer than 2 digits, add zeros
536 until there are 2, if there's enough room */
537 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
538 if (start + zeros + exponent_digit_cnt + 1
539 < buffer + buf_size) {
540 memmove(start + zeros, start,
541 exponent_digit_cnt + 1);
542 memset(start, '0', zeros);
543 }
544 }
545 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000546}
547
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000548/* Remove trailing zeros after the decimal point from a numeric string; also
549 remove the decimal point if all digits following it are zero. The numeric
550 string must end in '\0', and should not have any leading or trailing
551 whitespace. Assumes that the decimal point is '.'. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000552Py_LOCAL_INLINE(void)
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000553remove_trailing_zeros(char *buffer)
Eric Smithb2c7af82008-04-30 02:12:09 +0000554{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 char *old_fraction_end, *new_fraction_end, *end, *p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 p = buffer;
558 if (*p == '-' || *p == '+')
559 /* Skip leading sign, if present */
560 ++p;
561 while (Py_ISDIGIT(*p))
562 ++p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 /* if there's no decimal point there's nothing to do */
565 if (*p++ != '.')
566 return;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 /* scan any digits after the point */
569 while (Py_ISDIGIT(*p))
570 ++p;
571 old_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 /* scan up to ending '\0' */
574 while (*p != '\0')
575 p++;
576 /* +1 to make sure that we move the null byte as well */
577 end = p+1;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 /* scan back from fraction_end, looking for removable zeros */
580 p = old_fraction_end;
581 while (*(p-1) == '0')
582 --p;
583 /* and remove point if we've got that far */
584 if (*(p-1) == '.')
585 --p;
586 new_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000589}
590
591/* Ensure that buffer has a decimal point in it. The decimal point will not
592 be in the current locale, it will always be '.'. Don't add a decimal point
593 if an exponent is present. Also, convert to exponential notation where
594 adding a '.0' would produce too many significant digits (see issue 5864).
595
596 Returns a pointer to the fixed buffer, or NULL on failure.
597*/
598Py_LOCAL_INLINE(char *)
599ensure_decimal_point(char* buffer, size_t buf_size, int precision)
600{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 int digit_count, insert_count = 0, convert_to_exp = 0;
Serhiy Storchakae2f92de2017-11-11 13:06:26 +0200602 const char *chars_to_insert;
603 char *digits_start;
Eric Smithb2c7af82008-04-30 02:12:09 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 /* search for the first non-digit character */
606 char *p = buffer;
607 if (*p == '-' || *p == '+')
608 /* Skip leading sign, if present. I think this could only
609 ever be '-', but it can't hurt to check for both. */
610 ++p;
611 digits_start = p;
612 while (*p && Py_ISDIGIT(*p))
613 ++p;
614 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smithb2c7af82008-04-30 02:12:09 +0000615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 if (*p == '.') {
617 if (Py_ISDIGIT(*(p+1))) {
618 /* Nothing to do, we already have a decimal
619 point and a digit after it */
620 }
621 else {
622 /* We have a decimal point, but no following
623 digit. Insert a zero after the decimal. */
624 /* can't ever get here via PyOS_double_to_string */
625 assert(precision == -1);
626 ++p;
627 chars_to_insert = "0";
628 insert_count = 1;
629 }
630 }
631 else if (!(*p == 'e' || *p == 'E')) {
632 /* Don't add ".0" if we have an exponent. */
633 if (digit_count == precision) {
634 /* issue 5864: don't add a trailing .0 in the case
635 where the '%g'-formatted result already has as many
636 significant digits as were requested. Switch to
637 exponential notation instead. */
638 convert_to_exp = 1;
639 /* no exponent, no point, and we shouldn't land here
640 for infs and nans, so we must be at the end of the
641 string. */
642 assert(*p == '\0');
643 }
644 else {
645 assert(precision == -1 || digit_count < precision);
646 chars_to_insert = ".0";
647 insert_count = 2;
648 }
649 }
650 if (insert_count) {
651 size_t buf_len = strlen(buffer);
652 if (buf_len + insert_count + 1 >= buf_size) {
653 /* If there is not enough room in the buffer
654 for the additional text, just skip it. It's
655 not worth generating an error over. */
656 }
657 else {
658 memmove(p + insert_count, p,
659 buffer + strlen(buffer) - p + 1);
660 memcpy(p, chars_to_insert, insert_count);
661 }
662 }
663 if (convert_to_exp) {
664 int written;
665 size_t buf_avail;
666 p = digits_start;
667 /* insert decimal point */
668 assert(digit_count >= 1);
669 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
670 p[1] = '.';
671 p += digit_count+1;
672 assert(p <= buf_size+buffer);
673 buf_avail = buf_size+buffer-p;
674 if (buf_avail == 0)
675 return NULL;
676 /* Add exponent. It's okay to use lower case 'e': we only
677 arrive here as a result of using the empty format code or
678 repr/str builtins and those never want an upper case 'E' */
679 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
680 if (!(0 <= written &&
681 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
682 /* output truncated, or something else bad happened */
683 return NULL;
684 remove_trailing_zeros(buffer);
685 }
686 return buffer;
Eric Smithb2c7af82008-04-30 02:12:09 +0000687}
688
Christian Heimesc3f30c42008-02-22 16:37:40 +0000689/* see FORMATBUFLEN in unicodeobject.c */
690#define FLOAT_FORMATBUFLEN 120
691
Martin v. Löwis737ea822004-06-08 18:52:54 +0000692/**
Eric Smith68af50b2010-02-22 14:58:30 +0000693 * _PyOS_ascii_formatd:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000694 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000695 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000696 * @format: The printf()-style format to use for the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 * code to use for converting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000698 * @d: The #gdouble to convert
Eric Smith68af50b2010-02-22 14:58:30 +0000699 * @precision: The precision to use when formatting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000700 *
701 * Converts a #gdouble to a string, using the '.' as
702 * decimal point. To format the number you pass in
703 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000704 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 *
Christian Heimesb186d002008-03-18 15:15:01 +0000706 * 'Z' is the same as 'g', except it always has a decimal and
707 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000708 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000709 * Return value: The pointer to the buffer with the converted string.
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000710 * On failure returns NULL but does not set any Python exception.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000711 **/
Eric Smith68af50b2010-02-22 14:58:30 +0000712static char *
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713_PyOS_ascii_formatd(char *buffer,
714 size_t buf_size,
715 const char *format,
716 double d,
717 int precision)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000718{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 char format_char;
720 size_t format_len = strlen(format);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
723 also with at least one character past the decimal. */
724 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 /* The last character in the format string must be the format char */
727 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 if (format[0] != '%')
730 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 /* I'm not sure why this test is here. It's ensuring that the format
733 string after the first character doesn't have a single quote, a
734 lowercase l, or a percent. This is the reverse of the commented-out
735 test about 10 lines ago. */
736 if (strpbrk(format + 1, "'l%"))
737 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 /* Also curious about this function is that it accepts format strings
740 like "%xg", which are invalid for floats. In general, the
741 interface to this function is not very good, but changing it is
742 difficult because it's a public API. */
Christian Heimesb186d002008-03-18 15:15:01 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 if (!(format_char == 'e' || format_char == 'E' ||
745 format_char == 'f' || format_char == 'F' ||
746 format_char == 'g' || format_char == 'G' ||
747 format_char == 'Z'))
748 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 /* Map 'Z' format_char to 'g', by copying the format string and
751 replacing the final char with a 'g' */
752 if (format_char == 'Z') {
753 if (format_len + 1 >= sizeof(tmp_format)) {
754 /* The format won't fit in our copy. Error out. In
755 practice, this will never happen and will be
756 detected by returning NULL */
757 return NULL;
758 }
759 strcpy(tmp_format, format);
760 tmp_format[format_len - 1] = 'g';
761 format = tmp_format;
762 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000763
Christian Heimesb186d002008-03-18 15:15:01 +0000764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 /* Have PyOS_snprintf do the hard work */
766 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 /* Get the current locale, and find the decimal point string.
771 Convert that string back to a dot. */
772 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 /* If an exponent exists, ensure that the exponent is at least
775 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
776 for the extra zeros. Also, if there are more than
777 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
778 back to MIN_EXPONENT_DIGITS */
779 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 /* If format_char is 'Z', make sure we have at least one character
782 after the decimal point (and make sure we have a decimal point);
783 also switch to exponential notation in some edge cases where the
784 extra character would produce more significant digits that we
785 really want. */
786 if (format_char == 'Z')
787 buffer = ensure_decimal_point(buffer, buf_size, precision);
Christian Heimesb186d002008-03-18 15:15:01 +0000788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 return buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000790}
791
Eric Smith0923d1d2009-04-16 20:16:10 +0000792/* The fallback code to use if _Py_dg_dtoa is not available. */
793
794PyAPI_FUNC(char *) PyOS_double_to_string(double val,
795 char format_code,
796 int precision,
797 int flags,
798 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000799{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 char format[32];
801 Py_ssize_t bufsize;
802 char *buf;
803 int t, exp;
804 int upper = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000806 /* Validate format_code, and map upper and lower case */
807 switch (format_code) {
808 case 'e': /* exponent */
809 case 'f': /* fixed */
810 case 'g': /* general */
811 break;
812 case 'E':
813 upper = 1;
814 format_code = 'e';
815 break;
816 case 'F':
817 upper = 1;
818 format_code = 'f';
819 break;
820 case 'G':
821 upper = 1;
822 format_code = 'g';
823 break;
824 case 'r': /* repr format */
825 /* Supplied precision is unused, must be 0. */
826 if (precision != 0) {
827 PyErr_BadInternalCall();
828 return NULL;
829 }
830 /* The repr() precision (17 significant decimal digits) is the
831 minimal number that is guaranteed to have enough precision
832 so that if the number is read back in the exact same binary
833 value is recreated. This is true for IEEE floating point
834 by design, and also happens to work for all other modern
835 hardware. */
836 precision = 17;
837 format_code = 'g';
838 break;
839 default:
840 PyErr_BadInternalCall();
841 return NULL;
842 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 /* Here's a quick-and-dirty calculation to figure out how big a buffer
845 we need. In general, for a finite float we need:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 1 byte for each digit of the decimal significand, and
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 1 for a possible sign
850 1 for a possible decimal point
851 2 for a possible [eE][+-]
852 1 for each digit of the exponent; if we allow 19 digits
853 total then we're safe up to exponents of 2**63.
854 1 for the trailing nul byte
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 This gives a total of 24 + the number of digits in the significand,
857 and the number of digits in the significand is:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 for 'g' format: at most precision, except possibly
860 when precision == 0, when it's 1.
861 for 'e' format: precision+1
862 for 'f' format: precision digits after the point, at least 1
863 before. To figure out how many digits appear before the point
864 we have to examine the size of the number. If fabs(val) < 1.0
865 then there will be only one digit before the point. If
866 fabs(val) >= 1.0, then there are at most
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 1+floor(log10(ceiling(fabs(val))))
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 digits before the point (where the 'ceiling' allows for the
871 possibility that the rounding rounds the integer part of val
872 up). A safe upper bound for the above quantity is
873 1+floor(exp/3), where exp is the unique integer such that 0.5
874 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
875 frexp.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 So we allow room for precision+1 digits for all formats, plus an
878 extra floor(exp/3) digits for 'f' format.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 */
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
883 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
884 bufsize = 5;
885 else {
886 bufsize = 25 + precision;
887 if (format_code == 'f' && fabs(val) >= 1.0) {
888 frexp(val, &exp);
889 bufsize += exp/3;
890 }
891 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 buf = PyMem_Malloc(bufsize);
894 if (buf == NULL) {
895 PyErr_NoMemory();
896 return NULL;
897 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 /* Handle nan and inf. */
900 if (Py_IS_NAN(val)) {
901 strcpy(buf, "nan");
902 t = Py_DTST_NAN;
903 } else if (Py_IS_INFINITY(val)) {
904 if (copysign(1., val) == 1.)
905 strcpy(buf, "inf");
906 else
907 strcpy(buf, "-inf");
908 t = Py_DTST_INFINITE;
909 } else {
910 t = Py_DTST_FINITE;
911 if (flags & Py_DTSF_ADD_DOT_0)
912 format_code = 'Z';
Eric Smith0923d1d2009-04-16 20:16:10 +0000913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
915 (flags & Py_DTSF_ALT ? "#" : ""), precision,
916 format_code);
917 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
918 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 /* Add sign when requested. It's convenient (esp. when formatting
921 complex numbers) to include a sign even for inf and nan. */
922 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
923 size_t len = strlen(buf);
924 /* the bufsize calculations above should ensure that we've got
925 space to add a sign */
926 assert((size_t)bufsize >= len+2);
927 memmove(buf+1, buf, len+1);
928 buf[0] = '+';
929 }
930 if (upper) {
931 /* Convert to upper case. */
932 char *p1;
933 for (p1 = buf; *p1; p1++)
934 *p1 = Py_TOUPPER(*p1);
935 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 if (type)
938 *type = t;
939 return buf;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000940}
Eric Smith0923d1d2009-04-16 20:16:10 +0000941
942#else
943
944/* _Py_dg_dtoa is available. */
945
946/* I'm using a lookup table here so that I don't have to invent a non-locale
947 specific way to convert to uppercase */
948#define OFS_INF 0
949#define OFS_NAN 1
950#define OFS_E 2
951
952/* The lengths of these are known to the code below, so don't change them */
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200953static const char * const lc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 "inf",
955 "nan",
956 "e",
Eric Smith0923d1d2009-04-16 20:16:10 +0000957};
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200958static const char * const uc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 "INF",
960 "NAN",
961 "E",
Eric Smith0923d1d2009-04-16 20:16:10 +0000962};
963
964
965/* Convert a double d to a string, and return a PyMem_Malloc'd block of
966 memory contain the resulting string.
967
968 Arguments:
969 d is the double to be converted
Eric Smith63376222009-05-05 14:04:18 +0000970 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
971 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
Eric Smith0923d1d2009-04-16 20:16:10 +0000972 mode is one of '0', '2' or '3', and is completely determined by
Eric Smith63376222009-05-05 14:04:18 +0000973 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
Eric Smith0923d1d2009-04-16 20:16:10 +0000974 precision is the desired precision
975 always_add_sign is nonzero if a '+' sign should be included for positive
976 numbers
977 add_dot_0_if_integer is nonzero if integers in non-exponential form
Eric Smith63376222009-05-05 14:04:18 +0000978 should have ".0" added. Only applies to format codes 'r' and 'g'.
Eric Smith0923d1d2009-04-16 20:16:10 +0000979 use_alt_formatting is nonzero if alternative formatting should be
Eric Smith63376222009-05-05 14:04:18 +0000980 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
981 at most one of use_alt_formatting and add_dot_0_if_integer should
982 be nonzero.
Eric Smith0923d1d2009-04-16 20:16:10 +0000983 type, if non-NULL, will be set to one of these constants to identify
984 the type of the 'd' argument:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 Py_DTST_FINITE
986 Py_DTST_INFINITE
987 Py_DTST_NAN
Eric Smith0923d1d2009-04-16 20:16:10 +0000988
989 Returns a PyMem_Malloc'd block of memory containing the resulting string,
990 or NULL on error. If NULL is returned, the Python error has been set.
991 */
992
993static char *
994format_float_short(double d, char format_code,
Victor Stinner7b251352013-06-24 23:37:40 +0200995 int mode, int precision,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 int always_add_sign, int add_dot_0_if_integer,
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200997 int use_alt_formatting, const char * const *float_strings,
998 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +0000999{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 char *buf = NULL;
1001 char *p = NULL;
1002 Py_ssize_t bufsize = 0;
1003 char *digits, *digits_end;
1004 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1005 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1006 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +00001007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1009 Must be matched by a call to _Py_dg_freedtoa. */
1010 _Py_SET_53BIT_PRECISION_START;
1011 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1012 &digits_end);
1013 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +00001014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 decpt = (Py_ssize_t)decpt_as_int;
1016 if (digits == NULL) {
1017 /* The only failure mode is no memory. */
1018 PyErr_NoMemory();
1019 goto exit;
1020 }
1021 assert(digits_end != NULL && digits_end >= digits);
1022 digits_len = digits_end - digits;
Eric Smith0923d1d2009-04-16 20:16:10 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 if (digits_len && !Py_ISDIGIT(digits[0])) {
1025 /* Infinities and nans here; adapt Gay's output,
1026 so convert Infinity to inf and NaN to nan, and
1027 ignore sign of nan. Then return. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 /* ignore the actual sign of a nan */
1030 if (digits[0] == 'n' || digits[0] == 'N')
1031 sign = 0;
Mark Dickinsonad476da2009-04-23 19:14:16 +00001032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 /* We only need 5 bytes to hold the result "+inf\0" . */
1034 bufsize = 5; /* Used later in an assert. */
1035 buf = (char *)PyMem_Malloc(bufsize);
1036 if (buf == NULL) {
1037 PyErr_NoMemory();
1038 goto exit;
1039 }
1040 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (sign == 1) {
1043 *p++ = '-';
1044 }
1045 else if (always_add_sign) {
1046 *p++ = '+';
1047 }
1048 if (digits[0] == 'i' || digits[0] == 'I') {
1049 strncpy(p, float_strings[OFS_INF], 3);
1050 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +00001051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 if (type)
1053 *type = Py_DTST_INFINITE;
1054 }
1055 else if (digits[0] == 'n' || digits[0] == 'N') {
1056 strncpy(p, float_strings[OFS_NAN], 3);
1057 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 if (type)
1060 *type = Py_DTST_NAN;
1061 }
1062 else {
1063 /* shouldn't get here: Gay's code should always return
1064 something starting with a digit, an 'I', or 'N' */
Barry Warsawb2e57942017-09-14 18:13:16 -07001065 Py_UNREACHABLE();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 }
1067 goto exit;
1068 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 /* The result must be finite (not inf or nan). */
1071 if (type)
1072 *type = Py_DTST_FINITE;
Eric Smith0923d1d2009-04-16 20:16:10 +00001073
1074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 /* We got digits back, format them. We may need to pad 'digits'
1076 either on the left or right (or both) with extra zeros, so in
1077 general the resulting string has the form
Eric Smith0923d1d2009-04-16 20:16:10 +00001078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 [<sign>]<zeros><digits><zeros>[<exponent>]
Eric Smith0923d1d2009-04-16 20:16:10 +00001080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 where either of the <zeros> pieces could be empty, and there's a
1082 decimal point that could appear either in <digits> or in the
1083 leading or trailing <zeros>.
Eric Smith0923d1d2009-04-16 20:16:10 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 Imagine an infinite 'virtual' string vdigits, consisting of the
1086 string 'digits' (starting at index 0) padded on both the left and
1087 right with infinite strings of zeros. We want to output a slice
Eric Smith0923d1d2009-04-16 20:16:10 +00001088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 vdigits[vdigits_start : vdigits_end]
Eric Smith0923d1d2009-04-16 20:16:10 +00001090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 of this virtual string. Thus if vdigits_start < 0 then we'll end
1092 up producing some leading zeros; if vdigits_end > digits_len there
1093 will be trailing zeros in the output. The next section of code
1094 determines whether to use an exponent or not, figures out the
1095 position 'decpt' of the decimal point, and computes 'vdigits_start'
1096 and 'vdigits_end'. */
1097 vdigits_end = digits_len;
1098 switch (format_code) {
1099 case 'e':
1100 use_exp = 1;
1101 vdigits_end = precision;
1102 break;
1103 case 'f':
1104 vdigits_end = decpt + precision;
1105 break;
1106 case 'g':
1107 if (decpt <= -4 || decpt >
1108 (add_dot_0_if_integer ? precision-1 : precision))
1109 use_exp = 1;
1110 if (use_alt_formatting)
1111 vdigits_end = precision;
1112 break;
1113 case 'r':
1114 /* convert to exponential format at 1e16. We used to convert
1115 at 1e17, but that gives odd-looking results for some values
1116 when a 16-digit 'shortest' repr is padded with bogus zeros.
1117 For example, repr(2e16+8) would give 20000000000000010.0;
1118 the true value is 20000000000000008.0. */
1119 if (decpt <= -4 || decpt > 16)
1120 use_exp = 1;
1121 break;
1122 default:
1123 PyErr_BadInternalCall();
1124 goto exit;
1125 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 /* if using an exponent, reset decimal point position to 1 and adjust
1128 exponent accordingly.*/
1129 if (use_exp) {
Victor Stinner7b251352013-06-24 23:37:40 +02001130 exp = (int)decpt - 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 decpt = 1;
1132 }
1133 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1134 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1135 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1136 if (!use_exp && add_dot_0_if_integer)
1137 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1138 else
1139 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
Eric Smith0923d1d2009-04-16 20:16:10 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* double check inequalities */
1142 assert(vdigits_start <= 0 &&
1143 0 <= digits_len &&
1144 digits_len <= vdigits_end);
1145 /* decimal point should be in (vdigits_start, vdigits_end] */
1146 assert(vdigits_start < decpt && decpt <= vdigits_end);
Eric Smith0923d1d2009-04-16 20:16:10 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 /* Compute an upper bound how much memory we need. This might be a few
1149 chars too long, but no big deal. */
1150 bufsize =
1151 /* sign, decimal point and trailing 0 byte */
1152 3 +
Eric Smith0923d1d2009-04-16 20:16:10 +00001153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 /* total digit count (including zero padding on both sides) */
1155 (vdigits_end - vdigits_start) +
Eric Smith0923d1d2009-04-16 20:16:10 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 /* exponent "e+100", max 3 numerical digits */
1158 (use_exp ? 5 : 0);
Eric Smith0923d1d2009-04-16 20:16:10 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 /* Now allocate the memory and initialize p to point to the start of
1161 it. */
1162 buf = (char *)PyMem_Malloc(bufsize);
1163 if (buf == NULL) {
1164 PyErr_NoMemory();
1165 goto exit;
1166 }
1167 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 /* Add a negative sign if negative, and a plus sign if non-negative
1170 and always_add_sign is true. */
1171 if (sign == 1)
1172 *p++ = '-';
1173 else if (always_add_sign)
1174 *p++ = '+';
Eric Smith0923d1d2009-04-16 20:16:10 +00001175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 /* note that exactly one of the three 'if' conditions is true,
1177 so we include exactly one decimal point */
1178 /* Zero padding on left of digit string */
1179 if (decpt <= 0) {
1180 memset(p, '0', decpt-vdigits_start);
1181 p += decpt - vdigits_start;
1182 *p++ = '.';
1183 memset(p, '0', 0-decpt);
1184 p += 0-decpt;
1185 }
1186 else {
1187 memset(p, '0', 0-vdigits_start);
1188 p += 0 - vdigits_start;
1189 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 /* Digits, with included decimal point */
1192 if (0 < decpt && decpt <= digits_len) {
1193 strncpy(p, digits, decpt-0);
1194 p += decpt-0;
1195 *p++ = '.';
1196 strncpy(p, digits+decpt, digits_len-decpt);
1197 p += digits_len-decpt;
1198 }
1199 else {
1200 strncpy(p, digits, digits_len);
1201 p += digits_len;
1202 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 /* And zeros on the right */
1205 if (digits_len < decpt) {
1206 memset(p, '0', decpt-digits_len);
1207 p += decpt-digits_len;
1208 *p++ = '.';
1209 memset(p, '0', vdigits_end-decpt);
1210 p += vdigits_end-decpt;
1211 }
1212 else {
1213 memset(p, '0', vdigits_end-digits_len);
1214 p += vdigits_end-digits_len;
1215 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 /* Delete a trailing decimal pt unless using alternative formatting. */
1218 if (p[-1] == '.' && !use_alt_formatting)
1219 p--;
Eric Smith0923d1d2009-04-16 20:16:10 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 /* Now that we've done zero padding, add an exponent if needed. */
1222 if (use_exp) {
1223 *p++ = float_strings[OFS_E][0];
1224 exp_len = sprintf(p, "%+.02d", exp);
1225 p += exp_len;
1226 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001227 exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (buf) {
1229 *p = '\0';
1230 /* It's too late if this fails, as we've already stepped on
1231 memory that isn't ours. But it's an okay debugging test. */
1232 assert(p-buf < bufsize);
1233 }
1234 if (digits)
1235 _Py_dg_freedtoa(digits);
Eric Smith0923d1d2009-04-16 20:16:10 +00001236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 return buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001238}
1239
1240
1241PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 char format_code,
1243 int precision,
1244 int flags,
1245 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +00001246{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001247 const char * const *float_strings = lc_float_strings;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 /* Validate format_code, and map upper and lower case. Compute the
1251 mode and make any adjustments as needed. */
1252 switch (format_code) {
1253 /* exponent */
1254 case 'E':
1255 float_strings = uc_float_strings;
1256 format_code = 'e';
1257 /* Fall through. */
1258 case 'e':
1259 mode = 2;
1260 precision++;
1261 break;
Eric Smith193125a2009-04-16 22:08:31 +00001262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 /* fixed */
1264 case 'F':
1265 float_strings = uc_float_strings;
1266 format_code = 'f';
1267 /* Fall through. */
1268 case 'f':
1269 mode = 3;
1270 break;
Eric Smith193125a2009-04-16 22:08:31 +00001271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 /* general */
1273 case 'G':
1274 float_strings = uc_float_strings;
1275 format_code = 'g';
1276 /* Fall through. */
1277 case 'g':
1278 mode = 2;
1279 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1280 if (precision == 0)
1281 precision = 1;
1282 break;
Eric Smith193125a2009-04-16 22:08:31 +00001283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 /* repr format */
1285 case 'r':
1286 mode = 0;
1287 /* Supplied precision is unused, must be 0. */
1288 if (precision != 0) {
1289 PyErr_BadInternalCall();
1290 return NULL;
1291 }
1292 break;
Eric Smith193125a2009-04-16 22:08:31 +00001293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 default:
1295 PyErr_BadInternalCall();
1296 return NULL;
1297 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 return format_float_short(val, format_code, mode, precision,
1300 flags & Py_DTSF_SIGN,
1301 flags & Py_DTSF_ADD_DOT_0,
1302 flags & Py_DTSF_ALT,
1303 float_strings, type);
Eric Smith0923d1d2009-04-16 20:16:10 +00001304}
1305#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */