blob: 209c9086c87b9c061cf015ea371275f9be12f795 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
Mark Dickinson3b38df22009-10-26 14:36:29 +00006/* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
Mark Dickinsonbd16edd2009-05-20 22:05:25 +00008
9static int
10case_insensitive_match(const char *s, const char *t)
11{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000012 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
15 }
16 return *t ? 0 : 1;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000017}
18
Mark Dickinson3b38df22009-10-26 14:36:29 +000019/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
24
Mark Dickinsone383e822012-04-29 15:31:56 +010025#ifndef PY_NO_SHORT_FLOAT_REPR
26
27double
28_Py_parse_inf_or_nan(const char *p, char **endptr)
29{
30 double retval;
31 const char *s;
32 int negate = 0;
33
34 s = p;
35 if (*s == '-') {
36 negate = 1;
37 s++;
38 }
39 else if (*s == '+') {
40 s++;
41 }
42 if (case_insensitive_match(s, "inf")) {
43 s += 3;
44 if (case_insensitive_match(s, "inity"))
45 s += 5;
46 retval = _Py_dg_infinity(negate);
47 }
48 else if (case_insensitive_match(s, "nan")) {
49 s += 3;
50 retval = _Py_dg_stdnan(negate);
51 }
52 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
58}
59
60#else
61
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000062double
63_Py_parse_inf_or_nan(const char *p, char **endptr)
64{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 double retval;
66 const char *s;
67 int negate = 0;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 s = p;
70 if (*s == '-') {
71 negate = 1;
72 s++;
73 }
74 else if (*s == '+') {
75 s++;
76 }
77 if (case_insensitive_match(s, "inf")) {
78 s += 3;
79 if (case_insensitive_match(s, "inity"))
80 s += 5;
81 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000083#ifdef Py_NAN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 else if (case_insensitive_match(s, "nan")) {
85 s += 3;
86 retval = negate ? -Py_NAN : Py_NAN;
87 }
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000088#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 else {
90 s = p;
91 retval = -1.0;
92 }
93 *endptr = (char *)s;
94 return retval;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +000095}
96
Mark Dickinsone383e822012-04-29 15:31:56 +010097#endif
98
Martin v. Löwis737ea822004-06-08 18:52:54 +000099/**
Eric Smith68af50b2010-02-22 14:58:30 +0000100 * _PyOS_ascii_strtod:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000101 * @nptr: the string to convert to a numeric value.
102 * @endptr: if non-%NULL, it returns the character after
103 * the last character used in the conversion.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000120 * If memory allocation fails, %ENOMEM is stored in %errno.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
Eric Smith0923d1d2009-04-16 20:16:10 +0000127
128#ifndef PY_NO_SHORT_FLOAT_REPR
129
Eric Smith68af50b2010-02-22 14:58:30 +0000130static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000131_PyOS_ascii_strtod(const char *nptr, char **endptr)
Eric Smith0923d1d2009-04-16 20:16:10 +0000132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 double result;
134 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +0000135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 assert(nptr != NULL);
137 /* Set errno to zero, so that we can distinguish zero results
138 and underflows */
139 errno = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 _Py_SET_53BIT_PRECISION_START;
142 result = _Py_dg_strtod(nptr, endptr);
143 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (*endptr == nptr)
146 /* string might represent an inf or nan */
147 result = _Py_parse_inf_or_nan(nptr, endptr);
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return result;
Eric Smith0923d1d2009-04-16 20:16:10 +0000150
151}
152
153#else
154
155/*
156 Use system strtod; since strtod is locale aware, we may
157 have to first fix the decimal separator.
158
159 Note that unlike _Py_dg_strtod, the system strtod may not always give
160 correctly rounded results.
161*/
162
Eric Smith68af50b2010-02-22 14:58:30 +0000163static double
Mark Dickinson725bfd82009-05-03 20:33:40 +0000164_PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 char *fail_pos;
Georg Brandl6083a4b2013-10-14 06:51:46 +0200167 double val;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 struct lconv *locale_data;
169 const char *decimal_point;
170 size_t decimal_point_len;
171 const char *p, *decimal_point_pos;
172 const char *end = NULL; /* Silence gcc */
173 const char *digits_pos = NULL;
174 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 assert(nptr != NULL);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 fail_pos = NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 locale_data = localeconv();
181 decimal_point = locale_data->decimal_point;
182 decimal_point_len = strlen(decimal_point);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* Parse infinities and nans */
189 val = _Py_parse_inf_or_nan(nptr, endptr);
190 if (*endptr != nptr)
191 return val;
Mark Dickinsonbd16edd2009-05-20 22:05:25 +0000192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 /* Set errno to zero, so that we can distinguish zero results
194 and underflows */
195 errno = 0;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 /* We process the optional sign manually, then pass the remainder to
198 the system strtod. This ensures that the result of an underflow
199 has the correct sign. (bug #1725) */
200 p = nptr;
201 /* Process leading sign, if present */
202 if (*p == '-') {
203 negate = 1;
204 p++;
205 }
206 else if (*p == '+') {
207 p++;
208 }
Christian Heimesfaf2f632008-01-06 16:59:19 +0000209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 /* Some platform strtods accept hex floats; Python shouldn't (at the
211 moment), so we check explicitly for strings starting with '0x'. */
212 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 /* Check that what's left begins with a digit or decimal point */
216 if (!Py_ISDIGIT(*p) && *p != '.')
217 goto invalid_string;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 digits_pos = p;
220 if (decimal_point[0] != '.' ||
221 decimal_point[1] != 0)
222 {
223 /* Look for a '.' in the input; if present, it'll need to be
224 swapped for the current locale's decimal point before we
225 call strtod. On the other hand, if we find the current
226 locale's decimal point then the input is invalid. */
227 while (Py_ISDIGIT(*p))
228 p++;
Neal Norwitze7214a12005-12-18 05:03:17 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 if (*p == '.')
231 {
232 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 /* locate end of number */
235 while (Py_ISDIGIT(*p))
236 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 if (*p == 'e' || *p == 'E')
239 p++;
240 if (*p == '+' || *p == '-')
241 p++;
242 while (Py_ISDIGIT(*p))
243 p++;
244 end = p;
245 }
246 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247 /* Python bug #1417699 */
248 goto invalid_string;
249 /* For the other cases, we need not convert the decimal
250 point */
251 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 if (decimal_point_pos) {
254 char *copy, *c;
255 /* Create a copy of the input, with the '.' converted to the
256 locale-specific decimal point */
257 copy = (char *)PyMem_MALLOC(end - digits_pos +
258 1 + decimal_point_len);
259 if (copy == NULL) {
260 *endptr = (char *)nptr;
261 errno = ENOMEM;
262 return val;
263 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 c = copy;
266 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267 c += decimal_point_pos - digits_pos;
268 memcpy(c, decimal_point, decimal_point_len);
269 c += decimal_point_len;
270 memcpy(c, decimal_point_pos + 1,
271 end - (decimal_point_pos + 1));
272 c += end - (decimal_point_pos + 1);
273 *c = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 val = strtod(copy, &fail_pos);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 if (fail_pos)
278 {
279 if (fail_pos > decimal_point_pos)
280 fail_pos = (char *)digits_pos +
281 (fail_pos - copy) -
282 (decimal_point_len - 1);
283 else
284 fail_pos = (char *)digits_pos +
285 (fail_pos - copy);
286 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 }
291 else {
292 val = strtod(digits_pos, &fail_pos);
293 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 if (fail_pos == digits_pos)
296 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000297
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 if (negate && fail_pos != nptr)
299 val = -val;
300 *endptr = fail_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000303
304 invalid_string:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 *endptr = (char*)nptr;
306 errno = EINVAL;
307 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000308}
309
Eric Smith0923d1d2009-04-16 20:16:10 +0000310#endif
311
Eric Smith68af50b2010-02-22 14:58:30 +0000312/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313 as a string of ASCII characters) to a float. The string should not have
314 leading or trailing whitespace. The conversion is independent of the
315 current locale.
Mark Dickinson725bfd82009-05-03 20:33:40 +0000316
317 If endptr is NULL, try to convert the whole string. Raise ValueError and
318 return -1.0 if the string is not a valid representation of a floating-point
319 number.
320
321 If endptr is non-NULL, try to convert as much of the string as possible.
322 If no initial segment of the string is the valid representation of a
323 floating-point number then *endptr is set to point to the beginning of the
324 string, -1.0 is returned and again ValueError is raised.
325
326 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200328 exception is raised. Otherwise, overflow_exception should point to
Mark Dickinson725bfd82009-05-03 20:33:40 +0000329 a Python exception, this exception will be raised, -1.0 will be returned,
330 and *endptr will point just past the end of the converted value.
331
332 If any other failure occurs (for example lack of memory), -1.0 is returned
333 and the appropriate Python exception will have been set.
334*/
335
336double
337PyOS_string_to_double(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 char **endptr,
339 PyObject *overflow_exception)
Mark Dickinson725bfd82009-05-03 20:33:40 +0000340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 double x, result=-1.0;
342 char *fail_pos;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 errno = 0;
345 PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346 x = _PyOS_ascii_strtod(s, &fail_pos);
347 PyFPE_END_PROTECT(x)
Mark Dickinson725bfd82009-05-03 20:33:40 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 if (errno == ENOMEM) {
350 PyErr_NoMemory();
351 fail_pos = (char *)s;
352 }
353 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354 PyErr_Format(PyExc_ValueError,
355 "could not convert string to float: "
356 "%.200s", s);
357 else if (fail_pos == s)
358 PyErr_Format(PyExc_ValueError,
359 "could not convert string to float: "
360 "%.200s", s);
361 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362 PyErr_Format(overflow_exception,
363 "value too large to convert to float: "
364 "%.200s", s);
365 else
366 result = x;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 if (endptr != NULL)
369 *endptr = fail_pos;
370 return result;
Mark Dickinson725bfd82009-05-03 20:33:40 +0000371}
Eric Smith0923d1d2009-04-16 20:16:10 +0000372
Eric Smith68af50b2010-02-22 14:58:30 +0000373#ifdef PY_NO_SHORT_FLOAT_REPR
374
Eric Smithb2c7af82008-04-30 02:12:09 +0000375/* Given a string that may have a decimal point in the current
376 locale, change it back to a dot. Since the string cannot get
377 longer, no need for a maximum buffer size parameter. */
378Py_LOCAL_INLINE(void)
379change_decimal_from_locale_to_dot(char* buffer)
380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 struct lconv *locale_data = localeconv();
382 const char *decimal_point = locale_data->decimal_point;
Eric Smithb2c7af82008-04-30 02:12:09 +0000383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
385 size_t decimal_point_len = strlen(decimal_point);
Eric Smithb2c7af82008-04-30 02:12:09 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 if (*buffer == '+' || *buffer == '-')
388 buffer++;
389 while (Py_ISDIGIT(*buffer))
390 buffer++;
391 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
392 *buffer = '.';
393 buffer++;
394 if (decimal_point_len > 1) {
395 /* buffer needs to get smaller */
396 size_t rest_len = strlen(buffer +
397 (decimal_point_len - 1));
398 memmove(buffer,
399 buffer + (decimal_point_len - 1),
400 rest_len);
401 buffer[rest_len] = 0;
402 }
403 }
404 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000405}
406
Martin v. Löwis737ea822004-06-08 18:52:54 +0000407
Christian Heimesc3f30c42008-02-22 16:37:40 +0000408/* From the C99 standard, section 7.19.6:
409The exponent always contains at least two digits, and only as many more digits
410as necessary to represent the exponent.
411*/
412#define MIN_EXPONENT_DIGITS 2
413
Eric Smithb2c7af82008-04-30 02:12:09 +0000414/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
415 in length. */
416Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000417ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 char *p = strpbrk(buffer, "eE");
420 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
421 char *start = p + 2;
422 int exponent_digit_cnt = 0;
423 int leading_zero_cnt = 0;
424 int in_leading_zeros = 1;
425 int significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 /* Skip over the exponent and the sign. */
428 p += 2;
Eric Smithb2c7af82008-04-30 02:12:09 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 /* Find the end of the exponent, keeping track of leading
431 zeros. */
432 while (*p && Py_ISDIGIT(*p)) {
433 if (in_leading_zeros && *p == '0')
434 ++leading_zero_cnt;
435 if (*p != '0')
436 in_leading_zeros = 0;
437 ++p;
438 ++exponent_digit_cnt;
439 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
442 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
443 /* If there are 2 exactly digits, we're done,
444 regardless of what they contain */
445 }
446 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
447 int extra_zeros_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 /* There are more than 2 digits in the exponent. See
450 if we can delete some of the leading zeros */
451 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
452 significant_digit_cnt = MIN_EXPONENT_DIGITS;
453 extra_zeros_cnt = exponent_digit_cnt -
454 significant_digit_cnt;
Eric Smithb2c7af82008-04-30 02:12:09 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 /* Delete extra_zeros_cnt worth of characters from the
457 front of the exponent */
458 assert(extra_zeros_cnt >= 0);
Eric Smithb2c7af82008-04-30 02:12:09 +0000459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 /* Add one to significant_digit_cnt to copy the
461 trailing 0 byte, thus setting the length */
462 memmove(start,
463 start + extra_zeros_cnt,
464 significant_digit_cnt + 1);
465 }
466 else {
467 /* If there are fewer than 2 digits, add zeros
468 until there are 2, if there's enough room */
469 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
470 if (start + zeros + exponent_digit_cnt + 1
471 < buffer + buf_size) {
472 memmove(start + zeros, start,
473 exponent_digit_cnt + 1);
474 memset(start, '0', zeros);
475 }
476 }
477 }
Eric Smithb2c7af82008-04-30 02:12:09 +0000478}
479
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000480/* Remove trailing zeros after the decimal point from a numeric string; also
481 remove the decimal point if all digits following it are zero. The numeric
482 string must end in '\0', and should not have any leading or trailing
483 whitespace. Assumes that the decimal point is '.'. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000484Py_LOCAL_INLINE(void)
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000485remove_trailing_zeros(char *buffer)
Eric Smithb2c7af82008-04-30 02:12:09 +0000486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 char *old_fraction_end, *new_fraction_end, *end, *p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 p = buffer;
490 if (*p == '-' || *p == '+')
491 /* Skip leading sign, if present */
492 ++p;
493 while (Py_ISDIGIT(*p))
494 ++p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 /* if there's no decimal point there's nothing to do */
497 if (*p++ != '.')
498 return;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 /* scan any digits after the point */
501 while (Py_ISDIGIT(*p))
502 ++p;
503 old_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 /* scan up to ending '\0' */
506 while (*p != '\0')
507 p++;
508 /* +1 to make sure that we move the null byte as well */
509 end = p+1;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 /* scan back from fraction_end, looking for removable zeros */
512 p = old_fraction_end;
513 while (*(p-1) == '0')
514 --p;
515 /* and remove point if we've got that far */
516 if (*(p-1) == '.')
517 --p;
518 new_fraction_end = p;
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000520 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000521}
522
523/* Ensure that buffer has a decimal point in it. The decimal point will not
524 be in the current locale, it will always be '.'. Don't add a decimal point
525 if an exponent is present. Also, convert to exponential notation where
526 adding a '.0' would produce too many significant digits (see issue 5864).
527
528 Returns a pointer to the fixed buffer, or NULL on failure.
529*/
530Py_LOCAL_INLINE(char *)
531ensure_decimal_point(char* buffer, size_t buf_size, int precision)
532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 int digit_count, insert_count = 0, convert_to_exp = 0;
534 char *chars_to_insert, *digits_start;
Eric Smithb2c7af82008-04-30 02:12:09 +0000535
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 /* search for the first non-digit character */
537 char *p = buffer;
538 if (*p == '-' || *p == '+')
539 /* Skip leading sign, if present. I think this could only
540 ever be '-', but it can't hurt to check for both. */
541 ++p;
542 digits_start = p;
543 while (*p && Py_ISDIGIT(*p))
544 ++p;
545 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smithb2c7af82008-04-30 02:12:09 +0000546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 if (*p == '.') {
548 if (Py_ISDIGIT(*(p+1))) {
549 /* Nothing to do, we already have a decimal
550 point and a digit after it */
551 }
552 else {
553 /* We have a decimal point, but no following
554 digit. Insert a zero after the decimal. */
555 /* can't ever get here via PyOS_double_to_string */
556 assert(precision == -1);
557 ++p;
558 chars_to_insert = "0";
559 insert_count = 1;
560 }
561 }
562 else if (!(*p == 'e' || *p == 'E')) {
563 /* Don't add ".0" if we have an exponent. */
564 if (digit_count == precision) {
565 /* issue 5864: don't add a trailing .0 in the case
566 where the '%g'-formatted result already has as many
567 significant digits as were requested. Switch to
568 exponential notation instead. */
569 convert_to_exp = 1;
570 /* no exponent, no point, and we shouldn't land here
571 for infs and nans, so we must be at the end of the
572 string. */
573 assert(*p == '\0');
574 }
575 else {
576 assert(precision == -1 || digit_count < precision);
577 chars_to_insert = ".0";
578 insert_count = 2;
579 }
580 }
581 if (insert_count) {
582 size_t buf_len = strlen(buffer);
583 if (buf_len + insert_count + 1 >= buf_size) {
584 /* If there is not enough room in the buffer
585 for the additional text, just skip it. It's
586 not worth generating an error over. */
587 }
588 else {
589 memmove(p + insert_count, p,
590 buffer + strlen(buffer) - p + 1);
591 memcpy(p, chars_to_insert, insert_count);
592 }
593 }
594 if (convert_to_exp) {
595 int written;
596 size_t buf_avail;
597 p = digits_start;
598 /* insert decimal point */
599 assert(digit_count >= 1);
600 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
601 p[1] = '.';
602 p += digit_count+1;
603 assert(p <= buf_size+buffer);
604 buf_avail = buf_size+buffer-p;
605 if (buf_avail == 0)
606 return NULL;
607 /* Add exponent. It's okay to use lower case 'e': we only
608 arrive here as a result of using the empty format code or
609 repr/str builtins and those never want an upper case 'E' */
610 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
611 if (!(0 <= written &&
612 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
613 /* output truncated, or something else bad happened */
614 return NULL;
615 remove_trailing_zeros(buffer);
616 }
617 return buffer;
Eric Smithb2c7af82008-04-30 02:12:09 +0000618}
619
Christian Heimesc3f30c42008-02-22 16:37:40 +0000620/* see FORMATBUFLEN in unicodeobject.c */
621#define FLOAT_FORMATBUFLEN 120
622
Martin v. Löwis737ea822004-06-08 18:52:54 +0000623/**
Eric Smith68af50b2010-02-22 14:58:30 +0000624 * _PyOS_ascii_formatd:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000625 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000626 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000627 * @format: The printf()-style format to use for the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 * code to use for converting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000629 * @d: The #gdouble to convert
Eric Smith68af50b2010-02-22 14:58:30 +0000630 * @precision: The precision to use when formatting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000631 *
632 * Converts a #gdouble to a string, using the '.' as
633 * decimal point. To format the number you pass in
634 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000635 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 *
Christian Heimesb186d002008-03-18 15:15:01 +0000637 * 'Z' is the same as 'g', except it always has a decimal and
638 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000639 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000640 * Return value: The pointer to the buffer with the converted string.
Mark Dickinsond3ca5572009-04-29 18:47:07 +0000641 * On failure returns NULL but does not set any Python exception.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000642 **/
Eric Smith68af50b2010-02-22 14:58:30 +0000643static char *
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644_PyOS_ascii_formatd(char *buffer,
645 size_t buf_size,
646 const char *format,
647 double d,
648 int precision)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 char format_char;
651 size_t format_len = strlen(format);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
654 also with at least one character past the decimal. */
655 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 /* The last character in the format string must be the format char */
658 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 if (format[0] != '%')
661 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 /* I'm not sure why this test is here. It's ensuring that the format
664 string after the first character doesn't have a single quote, a
665 lowercase l, or a percent. This is the reverse of the commented-out
666 test about 10 lines ago. */
667 if (strpbrk(format + 1, "'l%"))
668 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 /* Also curious about this function is that it accepts format strings
671 like "%xg", which are invalid for floats. In general, the
672 interface to this function is not very good, but changing it is
673 difficult because it's a public API. */
Christian Heimesb186d002008-03-18 15:15:01 +0000674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 if (!(format_char == 'e' || format_char == 'E' ||
676 format_char == 'f' || format_char == 'F' ||
677 format_char == 'g' || format_char == 'G' ||
678 format_char == 'Z'))
679 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 /* Map 'Z' format_char to 'g', by copying the format string and
682 replacing the final char with a 'g' */
683 if (format_char == 'Z') {
684 if (format_len + 1 >= sizeof(tmp_format)) {
685 /* The format won't fit in our copy. Error out. In
686 practice, this will never happen and will be
687 detected by returning NULL */
688 return NULL;
689 }
690 strcpy(tmp_format, format);
691 tmp_format[format_len - 1] = 'g';
692 format = tmp_format;
693 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000694
Christian Heimesb186d002008-03-18 15:15:01 +0000695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 /* Have PyOS_snprintf do the hard work */
697 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 /* Get the current locale, and find the decimal point string.
702 Convert that string back to a dot. */
703 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 /* If an exponent exists, ensure that the exponent is at least
706 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
707 for the extra zeros. Also, if there are more than
708 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
709 back to MIN_EXPONENT_DIGITS */
710 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 /* If format_char is 'Z', make sure we have at least one character
713 after the decimal point (and make sure we have a decimal point);
714 also switch to exponential notation in some edge cases where the
715 extra character would produce more significant digits that we
716 really want. */
717 if (format_char == 'Z')
718 buffer = ensure_decimal_point(buffer, buf_size, precision);
Christian Heimesb186d002008-03-18 15:15:01 +0000719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 return buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000721}
722
Eric Smith0923d1d2009-04-16 20:16:10 +0000723/* The fallback code to use if _Py_dg_dtoa is not available. */
724
725PyAPI_FUNC(char *) PyOS_double_to_string(double val,
726 char format_code,
727 int precision,
728 int flags,
729 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000730{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 char format[32];
732 Py_ssize_t bufsize;
733 char *buf;
734 int t, exp;
735 int upper = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 /* Validate format_code, and map upper and lower case */
738 switch (format_code) {
739 case 'e': /* exponent */
740 case 'f': /* fixed */
741 case 'g': /* general */
742 break;
743 case 'E':
744 upper = 1;
745 format_code = 'e';
746 break;
747 case 'F':
748 upper = 1;
749 format_code = 'f';
750 break;
751 case 'G':
752 upper = 1;
753 format_code = 'g';
754 break;
755 case 'r': /* repr format */
756 /* Supplied precision is unused, must be 0. */
757 if (precision != 0) {
758 PyErr_BadInternalCall();
759 return NULL;
760 }
761 /* The repr() precision (17 significant decimal digits) is the
762 minimal number that is guaranteed to have enough precision
763 so that if the number is read back in the exact same binary
764 value is recreated. This is true for IEEE floating point
765 by design, and also happens to work for all other modern
766 hardware. */
767 precision = 17;
768 format_code = 'g';
769 break;
770 default:
771 PyErr_BadInternalCall();
772 return NULL;
773 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 /* Here's a quick-and-dirty calculation to figure out how big a buffer
776 we need. In general, for a finite float we need:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 1 byte for each digit of the decimal significand, and
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 1 for a possible sign
781 1 for a possible decimal point
782 2 for a possible [eE][+-]
783 1 for each digit of the exponent; if we allow 19 digits
784 total then we're safe up to exponents of 2**63.
785 1 for the trailing nul byte
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 This gives a total of 24 + the number of digits in the significand,
788 and the number of digits in the significand is:
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 for 'g' format: at most precision, except possibly
791 when precision == 0, when it's 1.
792 for 'e' format: precision+1
793 for 'f' format: precision digits after the point, at least 1
794 before. To figure out how many digits appear before the point
795 we have to examine the size of the number. If fabs(val) < 1.0
796 then there will be only one digit before the point. If
797 fabs(val) >= 1.0, then there are at most
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 1+floor(log10(ceiling(fabs(val))))
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 digits before the point (where the 'ceiling' allows for the
802 possibility that the rounding rounds the integer part of val
803 up). A safe upper bound for the above quantity is
804 1+floor(exp/3), where exp is the unique integer such that 0.5
805 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
806 frexp.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 So we allow room for precision+1 digits for all formats, plus an
809 extra floor(exp/3) digits for 'f' format.
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 */
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
814 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
815 bufsize = 5;
816 else {
817 bufsize = 25 + precision;
818 if (format_code == 'f' && fabs(val) >= 1.0) {
819 frexp(val, &exp);
820 bufsize += exp/3;
821 }
822 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 buf = PyMem_Malloc(bufsize);
825 if (buf == NULL) {
826 PyErr_NoMemory();
827 return NULL;
828 }
Mark Dickinsonf489caf2009-05-01 11:42:00 +0000829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 /* Handle nan and inf. */
831 if (Py_IS_NAN(val)) {
832 strcpy(buf, "nan");
833 t = Py_DTST_NAN;
834 } else if (Py_IS_INFINITY(val)) {
835 if (copysign(1., val) == 1.)
836 strcpy(buf, "inf");
837 else
838 strcpy(buf, "-inf");
839 t = Py_DTST_INFINITE;
840 } else {
841 t = Py_DTST_FINITE;
842 if (flags & Py_DTSF_ADD_DOT_0)
843 format_code = 'Z';
Eric Smith0923d1d2009-04-16 20:16:10 +0000844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
846 (flags & Py_DTSF_ALT ? "#" : ""), precision,
847 format_code);
848 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
849 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 /* Add sign when requested. It's convenient (esp. when formatting
852 complex numbers) to include a sign even for inf and nan. */
853 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
854 size_t len = strlen(buf);
855 /* the bufsize calculations above should ensure that we've got
856 space to add a sign */
857 assert((size_t)bufsize >= len+2);
858 memmove(buf+1, buf, len+1);
859 buf[0] = '+';
860 }
861 if (upper) {
862 /* Convert to upper case. */
863 char *p1;
864 for (p1 = buf; *p1; p1++)
865 *p1 = Py_TOUPPER(*p1);
866 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 if (type)
869 *type = t;
870 return buf;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000871}
Eric Smith0923d1d2009-04-16 20:16:10 +0000872
873#else
874
875/* _Py_dg_dtoa is available. */
876
877/* I'm using a lookup table here so that I don't have to invent a non-locale
878 specific way to convert to uppercase */
879#define OFS_INF 0
880#define OFS_NAN 1
881#define OFS_E 2
882
883/* The lengths of these are known to the code below, so don't change them */
884static char *lc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 "inf",
886 "nan",
887 "e",
Eric Smith0923d1d2009-04-16 20:16:10 +0000888};
889static char *uc_float_strings[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 "INF",
891 "NAN",
892 "E",
Eric Smith0923d1d2009-04-16 20:16:10 +0000893};
894
895
896/* Convert a double d to a string, and return a PyMem_Malloc'd block of
897 memory contain the resulting string.
898
899 Arguments:
900 d is the double to be converted
Eric Smith63376222009-05-05 14:04:18 +0000901 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
902 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
Eric Smith0923d1d2009-04-16 20:16:10 +0000903 mode is one of '0', '2' or '3', and is completely determined by
Eric Smith63376222009-05-05 14:04:18 +0000904 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
Eric Smith0923d1d2009-04-16 20:16:10 +0000905 precision is the desired precision
906 always_add_sign is nonzero if a '+' sign should be included for positive
907 numbers
908 add_dot_0_if_integer is nonzero if integers in non-exponential form
Eric Smith63376222009-05-05 14:04:18 +0000909 should have ".0" added. Only applies to format codes 'r' and 'g'.
Eric Smith0923d1d2009-04-16 20:16:10 +0000910 use_alt_formatting is nonzero if alternative formatting should be
Eric Smith63376222009-05-05 14:04:18 +0000911 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
912 at most one of use_alt_formatting and add_dot_0_if_integer should
913 be nonzero.
Eric Smith0923d1d2009-04-16 20:16:10 +0000914 type, if non-NULL, will be set to one of these constants to identify
915 the type of the 'd' argument:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 Py_DTST_FINITE
917 Py_DTST_INFINITE
918 Py_DTST_NAN
Eric Smith0923d1d2009-04-16 20:16:10 +0000919
920 Returns a PyMem_Malloc'd block of memory containing the resulting string,
921 or NULL on error. If NULL is returned, the Python error has been set.
922 */
923
924static char *
925format_float_short(double d, char format_code,
Victor Stinner7b251352013-06-24 23:37:40 +0200926 int mode, int precision,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 int always_add_sign, int add_dot_0_if_integer,
928 int use_alt_formatting, char **float_strings, int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +0000929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 char *buf = NULL;
931 char *p = NULL;
932 Py_ssize_t bufsize = 0;
933 char *digits, *digits_end;
934 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
935 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
936 _Py_SET_53BIT_PRECISION_HEADER;
Eric Smith0923d1d2009-04-16 20:16:10 +0000937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
939 Must be matched by a call to _Py_dg_freedtoa. */
940 _Py_SET_53BIT_PRECISION_START;
941 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
942 &digits_end);
943 _Py_SET_53BIT_PRECISION_END;
Eric Smith0923d1d2009-04-16 20:16:10 +0000944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 decpt = (Py_ssize_t)decpt_as_int;
946 if (digits == NULL) {
947 /* The only failure mode is no memory. */
948 PyErr_NoMemory();
949 goto exit;
950 }
951 assert(digits_end != NULL && digits_end >= digits);
952 digits_len = digits_end - digits;
Eric Smith0923d1d2009-04-16 20:16:10 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 if (digits_len && !Py_ISDIGIT(digits[0])) {
955 /* Infinities and nans here; adapt Gay's output,
956 so convert Infinity to inf and NaN to nan, and
957 ignore sign of nan. Then return. */
Eric Smith0923d1d2009-04-16 20:16:10 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 /* ignore the actual sign of a nan */
960 if (digits[0] == 'n' || digits[0] == 'N')
961 sign = 0;
Mark Dickinsonad476da2009-04-23 19:14:16 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 /* We only need 5 bytes to hold the result "+inf\0" . */
964 bufsize = 5; /* Used later in an assert. */
965 buf = (char *)PyMem_Malloc(bufsize);
966 if (buf == NULL) {
967 PyErr_NoMemory();
968 goto exit;
969 }
970 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +0000971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 if (sign == 1) {
973 *p++ = '-';
974 }
975 else if (always_add_sign) {
976 *p++ = '+';
977 }
978 if (digits[0] == 'i' || digits[0] == 'I') {
979 strncpy(p, float_strings[OFS_INF], 3);
980 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +0000981
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 if (type)
983 *type = Py_DTST_INFINITE;
984 }
985 else if (digits[0] == 'n' || digits[0] == 'N') {
986 strncpy(p, float_strings[OFS_NAN], 3);
987 p += 3;
Eric Smith0923d1d2009-04-16 20:16:10 +0000988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 if (type)
990 *type = Py_DTST_NAN;
991 }
992 else {
993 /* shouldn't get here: Gay's code should always return
994 something starting with a digit, an 'I', or 'N' */
995 strncpy(p, "ERR", 3);
Brett Cannonb94767f2011-02-22 20:15:44 +0000996 /* p += 3; */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 assert(0);
998 }
999 goto exit;
1000 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 /* The result must be finite (not inf or nan). */
1003 if (type)
1004 *type = Py_DTST_FINITE;
Eric Smith0923d1d2009-04-16 20:16:10 +00001005
1006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 /* We got digits back, format them. We may need to pad 'digits'
1008 either on the left or right (or both) with extra zeros, so in
1009 general the resulting string has the form
Eric Smith0923d1d2009-04-16 20:16:10 +00001010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 [<sign>]<zeros><digits><zeros>[<exponent>]
Eric Smith0923d1d2009-04-16 20:16:10 +00001012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 where either of the <zeros> pieces could be empty, and there's a
1014 decimal point that could appear either in <digits> or in the
1015 leading or trailing <zeros>.
Eric Smith0923d1d2009-04-16 20:16:10 +00001016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 Imagine an infinite 'virtual' string vdigits, consisting of the
1018 string 'digits' (starting at index 0) padded on both the left and
1019 right with infinite strings of zeros. We want to output a slice
Eric Smith0923d1d2009-04-16 20:16:10 +00001020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 vdigits[vdigits_start : vdigits_end]
Eric Smith0923d1d2009-04-16 20:16:10 +00001022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 of this virtual string. Thus if vdigits_start < 0 then we'll end
1024 up producing some leading zeros; if vdigits_end > digits_len there
1025 will be trailing zeros in the output. The next section of code
1026 determines whether to use an exponent or not, figures out the
1027 position 'decpt' of the decimal point, and computes 'vdigits_start'
1028 and 'vdigits_end'. */
1029 vdigits_end = digits_len;
1030 switch (format_code) {
1031 case 'e':
1032 use_exp = 1;
1033 vdigits_end = precision;
1034 break;
1035 case 'f':
1036 vdigits_end = decpt + precision;
1037 break;
1038 case 'g':
1039 if (decpt <= -4 || decpt >
1040 (add_dot_0_if_integer ? precision-1 : precision))
1041 use_exp = 1;
1042 if (use_alt_formatting)
1043 vdigits_end = precision;
1044 break;
1045 case 'r':
1046 /* convert to exponential format at 1e16. We used to convert
1047 at 1e17, but that gives odd-looking results for some values
1048 when a 16-digit 'shortest' repr is padded with bogus zeros.
1049 For example, repr(2e16+8) would give 20000000000000010.0;
1050 the true value is 20000000000000008.0. */
1051 if (decpt <= -4 || decpt > 16)
1052 use_exp = 1;
1053 break;
1054 default:
1055 PyErr_BadInternalCall();
1056 goto exit;
1057 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 /* if using an exponent, reset decimal point position to 1 and adjust
1060 exponent accordingly.*/
1061 if (use_exp) {
Victor Stinner7b251352013-06-24 23:37:40 +02001062 exp = (int)decpt - 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 decpt = 1;
1064 }
1065 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1066 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1067 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1068 if (!use_exp && add_dot_0_if_integer)
1069 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1070 else
1071 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
Eric Smith0923d1d2009-04-16 20:16:10 +00001072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 /* double check inequalities */
1074 assert(vdigits_start <= 0 &&
1075 0 <= digits_len &&
1076 digits_len <= vdigits_end);
1077 /* decimal point should be in (vdigits_start, vdigits_end] */
1078 assert(vdigits_start < decpt && decpt <= vdigits_end);
Eric Smith0923d1d2009-04-16 20:16:10 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 /* Compute an upper bound how much memory we need. This might be a few
1081 chars too long, but no big deal. */
1082 bufsize =
1083 /* sign, decimal point and trailing 0 byte */
1084 3 +
Eric Smith0923d1d2009-04-16 20:16:10 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 /* total digit count (including zero padding on both sides) */
1087 (vdigits_end - vdigits_start) +
Eric Smith0923d1d2009-04-16 20:16:10 +00001088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 /* exponent "e+100", max 3 numerical digits */
1090 (use_exp ? 5 : 0);
Eric Smith0923d1d2009-04-16 20:16:10 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 /* Now allocate the memory and initialize p to point to the start of
1093 it. */
1094 buf = (char *)PyMem_Malloc(bufsize);
1095 if (buf == NULL) {
1096 PyErr_NoMemory();
1097 goto exit;
1098 }
1099 p = buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 /* Add a negative sign if negative, and a plus sign if non-negative
1102 and always_add_sign is true. */
1103 if (sign == 1)
1104 *p++ = '-';
1105 else if (always_add_sign)
1106 *p++ = '+';
Eric Smith0923d1d2009-04-16 20:16:10 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 /* note that exactly one of the three 'if' conditions is true,
1109 so we include exactly one decimal point */
1110 /* Zero padding on left of digit string */
1111 if (decpt <= 0) {
1112 memset(p, '0', decpt-vdigits_start);
1113 p += decpt - vdigits_start;
1114 *p++ = '.';
1115 memset(p, '0', 0-decpt);
1116 p += 0-decpt;
1117 }
1118 else {
1119 memset(p, '0', 0-vdigits_start);
1120 p += 0 - vdigits_start;
1121 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 /* Digits, with included decimal point */
1124 if (0 < decpt && decpt <= digits_len) {
1125 strncpy(p, digits, decpt-0);
1126 p += decpt-0;
1127 *p++ = '.';
1128 strncpy(p, digits+decpt, digits_len-decpt);
1129 p += digits_len-decpt;
1130 }
1131 else {
1132 strncpy(p, digits, digits_len);
1133 p += digits_len;
1134 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 /* And zeros on the right */
1137 if (digits_len < decpt) {
1138 memset(p, '0', decpt-digits_len);
1139 p += decpt-digits_len;
1140 *p++ = '.';
1141 memset(p, '0', vdigits_end-decpt);
1142 p += vdigits_end-decpt;
1143 }
1144 else {
1145 memset(p, '0', vdigits_end-digits_len);
1146 p += vdigits_end-digits_len;
1147 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 /* Delete a trailing decimal pt unless using alternative formatting. */
1150 if (p[-1] == '.' && !use_alt_formatting)
1151 p--;
Eric Smith0923d1d2009-04-16 20:16:10 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 /* Now that we've done zero padding, add an exponent if needed. */
1154 if (use_exp) {
1155 *p++ = float_strings[OFS_E][0];
1156 exp_len = sprintf(p, "%+.02d", exp);
1157 p += exp_len;
1158 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001159 exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 if (buf) {
1161 *p = '\0';
1162 /* It's too late if this fails, as we've already stepped on
1163 memory that isn't ours. But it's an okay debugging test. */
1164 assert(p-buf < bufsize);
1165 }
1166 if (digits)
1167 _Py_dg_freedtoa(digits);
Eric Smith0923d1d2009-04-16 20:16:10 +00001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 return buf;
Eric Smith0923d1d2009-04-16 20:16:10 +00001170}
1171
1172
1173PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 char format_code,
1175 int precision,
1176 int flags,
1177 int *type)
Eric Smith0923d1d2009-04-16 20:16:10 +00001178{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 char **float_strings = lc_float_strings;
1180 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +00001181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 /* Validate format_code, and map upper and lower case. Compute the
1183 mode and make any adjustments as needed. */
1184 switch (format_code) {
1185 /* exponent */
1186 case 'E':
1187 float_strings = uc_float_strings;
1188 format_code = 'e';
1189 /* Fall through. */
1190 case 'e':
1191 mode = 2;
1192 precision++;
1193 break;
Eric Smith193125a2009-04-16 22:08:31 +00001194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 /* fixed */
1196 case 'F':
1197 float_strings = uc_float_strings;
1198 format_code = 'f';
1199 /* Fall through. */
1200 case 'f':
1201 mode = 3;
1202 break;
Eric Smith193125a2009-04-16 22:08:31 +00001203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 /* general */
1205 case 'G':
1206 float_strings = uc_float_strings;
1207 format_code = 'g';
1208 /* Fall through. */
1209 case 'g':
1210 mode = 2;
1211 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1212 if (precision == 0)
1213 precision = 1;
1214 break;
Eric Smith193125a2009-04-16 22:08:31 +00001215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 /* repr format */
1217 case 'r':
1218 mode = 0;
1219 /* Supplied precision is unused, must be 0. */
1220 if (precision != 0) {
1221 PyErr_BadInternalCall();
1222 return NULL;
1223 }
1224 break;
Eric Smith193125a2009-04-16 22:08:31 +00001225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 default:
1227 PyErr_BadInternalCall();
1228 return NULL;
1229 }
Eric Smith0923d1d2009-04-16 20:16:10 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 return format_float_short(val, format_code, mode, precision,
1232 flags & Py_DTSF_SIGN,
1233 flags & Py_DTSF_ADD_DOT_0,
1234 flags & Py_DTSF_ALT,
1235 float_strings, type);
Eric Smith0923d1d2009-04-16 20:16:10 +00001236}
1237#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */