blob: 859af435f9790d97a0826425061d4ddd3d39960c [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
Mark Dickinson975d7572009-10-26 15:39:50 +00006/* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
8
9static int
10case_insensitive_match(const char *s, const char *t)
11{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000012 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
15 }
16 return *t ? 0 : 1;
Mark Dickinson975d7572009-10-26 15:39:50 +000017}
18
19/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
24
25double
26_Py_parse_inf_or_nan(const char *p, char **endptr)
27{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000028 double retval;
29 const char *s;
30 int negate = 0;
Mark Dickinson975d7572009-10-26 15:39:50 +000031
Antoine Pitrouc83ea132010-05-09 14:46:46 +000032 s = p;
33 if (*s == '-') {
34 negate = 1;
35 s++;
36 }
37 else if (*s == '+') {
38 s++;
39 }
40 if (case_insensitive_match(s, "inf")) {
41 s += 3;
42 if (case_insensitive_match(s, "inity"))
43 s += 5;
44 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
45 }
Mark Dickinson975d7572009-10-26 15:39:50 +000046#ifdef Py_NAN
Antoine Pitrouc83ea132010-05-09 14:46:46 +000047 else if (case_insensitive_match(s, "nan")) {
48 s += 3;
49 retval = negate ? -Py_NAN : Py_NAN;
50 }
Mark Dickinson975d7572009-10-26 15:39:50 +000051#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
Mark Dickinson975d7572009-10-26 15:39:50 +000058}
59
Martin v. Löwis737ea822004-06-08 18:52:54 +000060/**
61 * PyOS_ascii_strtod:
62 * @nptr: the string to convert to a numeric value.
63 * @endptr: if non-%NULL, it returns the character after
64 * the last character used in the conversion.
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 *
Martin v. Löwis737ea822004-06-08 18:52:54 +000066 * Converts a string to a #gdouble value.
67 * This function behaves like the standard strtod() function
68 * does in the C locale. It does this without actually
69 * changing the current locale, since that would not be
70 * thread-safe.
71 *
72 * This function is typically used when reading configuration
73 * files or other non-user input that should be locale independent.
74 * To handle input from the user you should normally use the
75 * locale-sensitive system strtod() function.
76 *
77 * If the correct value would cause overflow, plus or minus %HUGE_VAL
78 * is returned (according to the sign of the value), and %ERANGE is
79 * stored in %errno. If the correct value would cause underflow,
80 * zero is returned and %ERANGE is stored in %errno.
Georg Brandlb569ee42006-05-29 14:28:05 +000081 * If memory allocation fails, %ENOMEM is stored in %errno.
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 *
Martin v. Löwis737ea822004-06-08 18:52:54 +000083 * This function resets %errno before calling strtod() so that
84 * you can reliably detect overflow and underflow.
85 *
86 * Return value: the #gdouble value.
87 **/
Eric Smithaca19e62009-04-22 13:29:05 +000088
Mark Dickinson975d7572009-10-26 15:39:50 +000089#ifndef PY_NO_SHORT_FLOAT_REPR
90
91double
92_PyOS_ascii_strtod(const char *nptr, char **endptr)
93{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094 double result;
95 _Py_SET_53BIT_PRECISION_HEADER;
Mark Dickinson975d7572009-10-26 15:39:50 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 assert(nptr != NULL);
98 /* Set errno to zero, so that we can distinguish zero results
99 and underflows */
100 errno = 0;
Mark Dickinson975d7572009-10-26 15:39:50 +0000101
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000102 _Py_SET_53BIT_PRECISION_START;
103 result = _Py_dg_strtod(nptr, endptr);
104 _Py_SET_53BIT_PRECISION_END;
Mark Dickinson975d7572009-10-26 15:39:50 +0000105
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000106 if (*endptr == nptr)
107 /* string might represent an inf or nan */
108 result = _Py_parse_inf_or_nan(nptr, endptr);
Mark Dickinson975d7572009-10-26 15:39:50 +0000109
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000110 return result;
Mark Dickinson975d7572009-10-26 15:39:50 +0000111
112}
113
114#else
115
Eric Smithaca19e62009-04-22 13:29:05 +0000116/*
117 Use system strtod; since strtod is locale aware, we may
118 have to first fix the decimal separator.
119
120 Note that unlike _Py_dg_strtod, the system strtod may not always give
121 correctly rounded results.
122*/
123
Martin v. Löwis737ea822004-06-08 18:52:54 +0000124double
Mark Dickinson975d7572009-10-26 15:39:50 +0000125_PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000126{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000127 char *fail_pos;
128 double val = -1.0;
129 struct lconv *locale_data;
130 const char *decimal_point;
131 size_t decimal_point_len;
132 const char *p, *decimal_point_pos;
133 const char *end = NULL; /* Silence gcc */
134 const char *digits_pos = NULL;
135 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 assert(nptr != NULL);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000138
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000139 fail_pos = NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000140
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000141 locale_data = localeconv();
142 decimal_point = locale_data->decimal_point;
143 decimal_point_len = strlen(decimal_point);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000144
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000145 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000146
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000147 decimal_point_pos = NULL;
Guido van Rossum3b835492008-01-05 00:59:59 +0000148
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000149 /* Parse infinities and nans */
150 val = _Py_parse_inf_or_nan(nptr, endptr);
151 if (*endptr != nptr)
152 return val;
Mark Dickinson975d7572009-10-26 15:39:50 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 /* Set errno to zero, so that we can distinguish zero results
155 and underflows */
156 errno = 0;
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000157
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000158 /* We process the optional sign manually, then pass the remainder to
159 the system strtod. This ensures that the result of an underflow
160 has the correct sign. (bug #1725) */
161 p = nptr;
162 /* Process leading sign, if present */
163 if (*p == '-') {
164 negate = 1;
165 p++;
166 }
167 else if (*p == '+') {
168 p++;
169 }
Guido van Rossum3b835492008-01-05 00:59:59 +0000170
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000171 /* Some platform strtods accept hex floats; Python shouldn't (at the
172 moment), so we check explicitly for strings starting with '0x'. */
173 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
174 goto invalid_string;
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000175
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000176 /* Check that what's left begins with a digit or decimal point */
177 if (!Py_ISDIGIT(*p) && *p != '.')
178 goto invalid_string;
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000179
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 digits_pos = p;
181 if (decimal_point[0] != '.' ||
182 decimal_point[1] != 0)
183 {
184 /* Look for a '.' in the input; if present, it'll need to be
185 swapped for the current locale's decimal point before we
186 call strtod. On the other hand, if we find the current
187 locale's decimal point then the input is invalid. */
188 while (Py_ISDIGIT(*p))
189 p++;
Neal Norwitze7214a12005-12-18 05:03:17 +0000190
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 if (*p == '.')
192 {
193 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* locate end of number */
196 while (Py_ISDIGIT(*p))
197 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000198
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199 if (*p == 'e' || *p == 'E')
200 p++;
201 if (*p == '+' || *p == '-')
202 p++;
203 while (Py_ISDIGIT(*p))
204 p++;
205 end = p;
206 }
207 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
208 /* Python bug #1417699 */
209 goto invalid_string;
210 /* For the other cases, we need not convert the decimal
211 point */
212 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 if (decimal_point_pos) {
215 char *copy, *c;
216 /* Create a copy of the input, with the '.' converted to the
217 locale-specific decimal point */
218 copy = (char *)PyMem_MALLOC(end - digits_pos +
219 1 + decimal_point_len);
220 if (copy == NULL) {
221 *endptr = (char *)nptr;
222 errno = ENOMEM;
223 return val;
224 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000225
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000226 c = copy;
227 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
228 c += decimal_point_pos - digits_pos;
229 memcpy(c, decimal_point, decimal_point_len);
230 c += decimal_point_len;
231 memcpy(c, decimal_point_pos + 1,
232 end - (decimal_point_pos + 1));
233 c += end - (decimal_point_pos + 1);
234 *c = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000235
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000236 val = strtod(copy, &fail_pos);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 if (fail_pos)
239 {
240 if (fail_pos > decimal_point_pos)
241 fail_pos = (char *)digits_pos +
242 (fail_pos - copy) -
243 (decimal_point_len - 1);
244 else
245 fail_pos = (char *)digits_pos +
246 (fail_pos - copy);
247 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000248
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000249 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000250
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000251 }
252 else {
253 val = strtod(digits_pos, &fail_pos);
254 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000255
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000256 if (fail_pos == digits_pos)
257 goto invalid_string;
Guido van Rossum3b835492008-01-05 00:59:59 +0000258
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000259 if (negate && fail_pos != nptr)
260 val = -val;
261 *endptr = fail_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000262
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000263 return val;
Mark Dickinson6d6b2202009-04-26 16:04:05 +0000264
265 invalid_string:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 *endptr = (char*)nptr;
267 errno = EINVAL;
268 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000269}
270
Mark Dickinson975d7572009-10-26 15:39:50 +0000271#endif
272
Mark Dickinson09823a22009-10-31 09:42:39 +0000273/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
274
Mark Dickinson975d7572009-10-26 15:39:50 +0000275double
276PyOS_ascii_strtod(const char *nptr, char **endptr)
277{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000278 char *fail_pos;
279 const char *p;
280 double x;
Mark Dickinson975d7572009-10-26 15:39:50 +0000281
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000282 if (PyErr_WarnEx(PyExc_DeprecationWarning,
283 "PyOS_ascii_strtod and PyOS_ascii_atof are "
284 "deprecated. Use PyOS_string_to_double "
285 "instead.", 1) < 0)
286 return -1.0;
Mark Dickinson09823a22009-10-31 09:42:39 +0000287
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000288 /* _PyOS_ascii_strtod already does everything that we want,
289 except that it doesn't parse leading whitespace */
290 p = nptr;
291 while (Py_ISSPACE(*p))
292 p++;
293 x = _PyOS_ascii_strtod(p, &fail_pos);
294 if (fail_pos == p)
295 fail_pos = (char *)nptr;
296 if (endptr)
297 *endptr = (char *)fail_pos;
298 return x;
Mark Dickinson975d7572009-10-26 15:39:50 +0000299}
300
Mark Dickinson09823a22009-10-31 09:42:39 +0000301/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
302
Eric Smithaca19e62009-04-22 13:29:05 +0000303double
304PyOS_ascii_atof(const char *nptr)
305{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 return PyOS_ascii_strtod(nptr, NULL);
Eric Smithaca19e62009-04-22 13:29:05 +0000307}
308
Mark Dickinson09823a22009-10-31 09:42:39 +0000309/* PyOS_string_to_double is the recommended replacement for the deprecated
Mark Dickinson975d7572009-10-26 15:39:50 +0000310 PyOS_ascii_strtod and PyOS_ascii_atof functions. It converts a
311 null-terminated byte string s (interpreted as a string of ASCII characters)
312 to a float. The string should not have leading or trailing whitespace (in
313 contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
314 whitespace). The conversion is independent of the current locale.
315
316 If endptr is NULL, try to convert the whole string. Raise ValueError and
317 return -1.0 if the string is not a valid representation of a floating-point
318 number.
319
320 If endptr is non-NULL, try to convert as much of the string as possible.
321 If no initial segment of the string is the valid representation of a
322 floating-point number then *endptr is set to point to the beginning of the
323 string, -1.0 is returned and again ValueError is raised.
324
325 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
326 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
327 exception is raised. Otherwise, overflow_exception should point to a
328 a Python exception, this exception will be raised, -1.0 will be returned,
329 and *endptr will point just past the end of the converted value.
330
331 If any other failure occurs (for example lack of memory), -1.0 is returned
332 and the appropriate Python exception will have been set.
333*/
334
335double
336PyOS_string_to_double(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000337 char **endptr,
338 PyObject *overflow_exception)
Mark Dickinson975d7572009-10-26 15:39:50 +0000339{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000340 double x, result=-1.0;
341 char *fail_pos;
Mark Dickinson975d7572009-10-26 15:39:50 +0000342
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000343 errno = 0;
344 PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
345 x = _PyOS_ascii_strtod(s, &fail_pos);
346 PyFPE_END_PROTECT(x)
Mark Dickinson975d7572009-10-26 15:39:50 +0000347
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000348 if (errno == ENOMEM) {
349 PyErr_NoMemory();
350 fail_pos = (char *)s;
351 }
352 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
353 PyErr_Format(PyExc_ValueError,
354 "could not convert string to float: "
355 "%.200s", s);
356 else if (fail_pos == s)
357 PyErr_Format(PyExc_ValueError,
358 "could not convert string to float: "
359 "%.200s", s);
360 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
361 PyErr_Format(overflow_exception,
362 "value too large to convert to float: "
363 "%.200s", s);
364 else
365 result = x;
Mark Dickinson975d7572009-10-26 15:39:50 +0000366
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000367 if (endptr != NULL)
368 *endptr = fail_pos;
369 return result;
Mark Dickinson975d7572009-10-26 15:39:50 +0000370}
Eric Smithaca19e62009-04-22 13:29:05 +0000371
Eric Smith0a950632008-04-30 01:09:30 +0000372/* Given a string that may have a decimal point in the current
373 locale, change it back to a dot. Since the string cannot get
374 longer, no need for a maximum buffer size parameter. */
375Py_LOCAL_INLINE(void)
376change_decimal_from_locale_to_dot(char* buffer)
377{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000378 struct lconv *locale_data = localeconv();
379 const char *decimal_point = locale_data->decimal_point;
Eric Smith0a950632008-04-30 01:09:30 +0000380
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000381 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
382 size_t decimal_point_len = strlen(decimal_point);
Eric Smith0a950632008-04-30 01:09:30 +0000383
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000384 if (*buffer == '+' || *buffer == '-')
385 buffer++;
386 while (Py_ISDIGIT(*buffer))
387 buffer++;
388 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
389 *buffer = '.';
390 buffer++;
391 if (decimal_point_len > 1) {
392 /* buffer needs to get smaller */
393 size_t rest_len = strlen(buffer +
394 (decimal_point_len - 1));
395 memmove(buffer,
396 buffer + (decimal_point_len - 1),
397 rest_len);
398 buffer[rest_len] = 0;
399 }
400 }
401 }
Eric Smith0a950632008-04-30 01:09:30 +0000402}
403
Martin v. Löwis737ea822004-06-08 18:52:54 +0000404
Eric Smith7ef40bf2008-02-20 23:34:22 +0000405/* From the C99 standard, section 7.19.6:
406The exponent always contains at least two digits, and only as many more digits
407as necessary to represent the exponent.
408*/
409#define MIN_EXPONENT_DIGITS 2
410
Eric Smith0a950632008-04-30 01:09:30 +0000411/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
412 in length. */
413Py_LOCAL_INLINE(void)
Mark Dickinsone73cbe72009-04-26 19:54:55 +0000414ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smith0a950632008-04-30 01:09:30 +0000415{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000416 char *p = strpbrk(buffer, "eE");
417 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
418 char *start = p + 2;
419 int exponent_digit_cnt = 0;
420 int leading_zero_cnt = 0;
421 int in_leading_zeros = 1;
422 int significant_digit_cnt;
Eric Smith0a950632008-04-30 01:09:30 +0000423
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000424 /* Skip over the exponent and the sign. */
425 p += 2;
Eric Smith0a950632008-04-30 01:09:30 +0000426
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000427 /* Find the end of the exponent, keeping track of leading
428 zeros. */
429 while (*p && Py_ISDIGIT(*p)) {
430 if (in_leading_zeros && *p == '0')
431 ++leading_zero_cnt;
432 if (*p != '0')
433 in_leading_zeros = 0;
434 ++p;
435 ++exponent_digit_cnt;
436 }
Eric Smith0a950632008-04-30 01:09:30 +0000437
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000438 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
439 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
440 /* If there are 2 exactly digits, we're done,
441 regardless of what they contain */
442 }
443 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
444 int extra_zeros_cnt;
Eric Smith0a950632008-04-30 01:09:30 +0000445
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000446 /* There are more than 2 digits in the exponent. See
447 if we can delete some of the leading zeros */
448 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
449 significant_digit_cnt = MIN_EXPONENT_DIGITS;
450 extra_zeros_cnt = exponent_digit_cnt -
451 significant_digit_cnt;
Eric Smith0a950632008-04-30 01:09:30 +0000452
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000453 /* Delete extra_zeros_cnt worth of characters from the
454 front of the exponent */
455 assert(extra_zeros_cnt >= 0);
Eric Smith0a950632008-04-30 01:09:30 +0000456
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000457 /* Add one to significant_digit_cnt to copy the
458 trailing 0 byte, thus setting the length */
459 memmove(start,
460 start + extra_zeros_cnt,
461 significant_digit_cnt + 1);
462 }
463 else {
464 /* If there are fewer than 2 digits, add zeros
465 until there are 2, if there's enough room */
466 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
467 if (start + zeros + exponent_digit_cnt + 1
468 < buffer + buf_size) {
469 memmove(start + zeros, start,
470 exponent_digit_cnt + 1);
471 memset(start, '0', zeros);
472 }
473 }
474 }
Eric Smith0a950632008-04-30 01:09:30 +0000475}
476
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000477/* Remove trailing zeros after the decimal point from a numeric string; also
478 remove the decimal point if all digits following it are zero. The numeric
479 string must end in '\0', and should not have any leading or trailing
480 whitespace. Assumes that the decimal point is '.'. */
Eric Smith0a950632008-04-30 01:09:30 +0000481Py_LOCAL_INLINE(void)
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000482remove_trailing_zeros(char *buffer)
Eric Smith0a950632008-04-30 01:09:30 +0000483{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000484 char *old_fraction_end, *new_fraction_end, *end, *p;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000485
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000486 p = buffer;
487 if (*p == '-' || *p == '+')
488 /* Skip leading sign, if present */
489 ++p;
490 while (Py_ISDIGIT(*p))
491 ++p;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000492
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000493 /* if there's no decimal point there's nothing to do */
494 if (*p++ != '.')
495 return;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000496
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 /* scan any digits after the point */
498 while (Py_ISDIGIT(*p))
499 ++p;
500 old_fraction_end = p;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000501
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000502 /* scan up to ending '\0' */
503 while (*p != '\0')
504 p++;
505 /* +1 to make sure that we move the null byte as well */
506 end = p+1;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000507
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000508 /* scan back from fraction_end, looking for removable zeros */
509 p = old_fraction_end;
510 while (*(p-1) == '0')
511 --p;
512 /* and remove point if we've got that far */
513 if (*(p-1) == '.')
514 --p;
515 new_fraction_end = p;
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000516
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000517 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000518}
519
520/* Ensure that buffer has a decimal point in it. The decimal point will not
521 be in the current locale, it will always be '.'. Don't add a decimal point
522 if an exponent is present. Also, convert to exponential notation where
523 adding a '.0' would produce too many significant digits (see issue 5864).
524
525 Returns a pointer to the fixed buffer, or NULL on failure.
526*/
527Py_LOCAL_INLINE(char *)
528ensure_decimal_point(char* buffer, size_t buf_size, int precision)
529{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000530 int digit_count, insert_count = 0, convert_to_exp = 0;
531 char *chars_to_insert, *digits_start;
Eric Smith0a950632008-04-30 01:09:30 +0000532
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000533 /* search for the first non-digit character */
534 char *p = buffer;
535 if (*p == '-' || *p == '+')
536 /* Skip leading sign, if present. I think this could only
537 ever be '-', but it can't hurt to check for both. */
538 ++p;
539 digits_start = p;
540 while (*p && Py_ISDIGIT(*p))
541 ++p;
542 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smith0a950632008-04-30 01:09:30 +0000543
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000544 if (*p == '.') {
545 if (Py_ISDIGIT(*(p+1))) {
546 /* Nothing to do, we already have a decimal
547 point and a digit after it */
548 }
549 else {
550 /* We have a decimal point, but no following
551 digit. Insert a zero after the decimal. */
552 /* can't ever get here via PyOS_double_to_string */
553 assert(precision == -1);
554 ++p;
555 chars_to_insert = "0";
556 insert_count = 1;
557 }
558 }
559 else if (!(*p == 'e' || *p == 'E')) {
560 /* Don't add ".0" if we have an exponent. */
561 if (digit_count == precision) {
562 /* issue 5864: don't add a trailing .0 in the case
563 where the '%g'-formatted result already has as many
564 significant digits as were requested. Switch to
565 exponential notation instead. */
566 convert_to_exp = 1;
567 /* no exponent, no point, and we shouldn't land here
568 for infs and nans, so we must be at the end of the
569 string. */
570 assert(*p == '\0');
571 }
572 else {
573 assert(precision == -1 || digit_count < precision);
574 chars_to_insert = ".0";
575 insert_count = 2;
576 }
577 }
578 if (insert_count) {
579 size_t buf_len = strlen(buffer);
580 if (buf_len + insert_count + 1 >= buf_size) {
581 /* If there is not enough room in the buffer
582 for the additional text, just skip it. It's
583 not worth generating an error over. */
584 }
585 else {
586 memmove(p + insert_count, p,
587 buffer + strlen(buffer) - p + 1);
588 memcpy(p, chars_to_insert, insert_count);
589 }
590 }
591 if (convert_to_exp) {
592 int written;
593 size_t buf_avail;
594 p = digits_start;
595 /* insert decimal point */
596 assert(digit_count >= 1);
597 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
598 p[1] = '.';
599 p += digit_count+1;
600 assert(p <= buf_size+buffer);
601 buf_avail = buf_size+buffer-p;
602 if (buf_avail == 0)
603 return NULL;
604 /* Add exponent. It's okay to use lower case 'e': we only
605 arrive here as a result of using the empty format code or
606 repr/str builtins and those never want an upper case 'E' */
607 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
608 if (!(0 <= written &&
609 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
610 /* output truncated, or something else bad happened */
611 return NULL;
612 remove_trailing_zeros(buffer);
613 }
614 return buffer;
Eric Smith0a950632008-04-30 01:09:30 +0000615}
616
Eric Smith7ef40bf2008-02-20 23:34:22 +0000617/* see FORMATBUFLEN in unicodeobject.c */
618#define FLOAT_FORMATBUFLEN 120
619
Martin v. Löwis737ea822004-06-08 18:52:54 +0000620/**
Mark Dickinson975d7572009-10-26 15:39:50 +0000621 * PyOS_ascii_formatd:
Martin v. Löwis737ea822004-06-08 18:52:54 +0000622 * @buffer: A buffer to place the resulting string in
Eric Smith8113ca62008-03-17 11:01:01 +0000623 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000624 * @format: The printf()-style format to use for the
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 * code to use for converting.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000626 * @d: The #gdouble to convert
627 *
628 * Converts a #gdouble to a string, using the '.' as
629 * decimal point. To format the number you pass in
630 * a printf()-style format string. Allowed conversion
Eric Smithaca19e62009-04-22 13:29:05 +0000631 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000632 *
Eric Smith8113ca62008-03-17 11:01:01 +0000633 * 'Z' is the same as 'g', except it always has a decimal and
634 * at least one digit after the decimal.
Eric Smith7ef40bf2008-02-20 23:34:22 +0000635 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000636 * Return value: The pointer to the buffer with the converted string.
Mark Dickinson92fcc9c2009-04-29 20:41:00 +0000637 * On failure returns NULL but does not set any Python exception.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000638 **/
Mark Dickinson975d7572009-10-26 15:39:50 +0000639char *
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000640_PyOS_ascii_formatd(char *buffer,
641 size_t buf_size,
642 const char *format,
643 double d,
644 int precision)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000645{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000646 char format_char;
647 size_t format_len = strlen(format);
Eric Smith7ef40bf2008-02-20 23:34:22 +0000648
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000649 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
650 also with at least one character past the decimal. */
651 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000652
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000653 /* The last character in the format string must be the format char */
654 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000655
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 if (format[0] != '%')
657 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000658
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000659 /* I'm not sure why this test is here. It's ensuring that the format
660 string after the first character doesn't have a single quote, a
661 lowercase l, or a percent. This is the reverse of the commented-out
662 test about 10 lines ago. */
663 if (strpbrk(format + 1, "'l%"))
664 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000665
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000666 /* Also curious about this function is that it accepts format strings
667 like "%xg", which are invalid for floats. In general, the
668 interface to this function is not very good, but changing it is
669 difficult because it's a public API. */
Eric Smith8113ca62008-03-17 11:01:01 +0000670
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000671 if (!(format_char == 'e' || format_char == 'E' ||
672 format_char == 'f' || format_char == 'F' ||
673 format_char == 'g' || format_char == 'G' ||
674 format_char == 'Z'))
675 return NULL;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000676
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000677 /* Map 'Z' format_char to 'g', by copying the format string and
678 replacing the final char with a 'g' */
679 if (format_char == 'Z') {
680 if (format_len + 1 >= sizeof(tmp_format)) {
681 /* The format won't fit in our copy. Error out. In
682 practice, this will never happen and will be
683 detected by returning NULL */
684 return NULL;
685 }
686 strcpy(tmp_format, format);
687 tmp_format[format_len - 1] = 'g';
688 format = tmp_format;
689 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000690
Eric Smith8113ca62008-03-17 11:01:01 +0000691
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000692 /* Have PyOS_snprintf do the hard work */
693 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000694
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000695 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000696
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000697 /* Get the current locale, and find the decimal point string.
698 Convert that string back to a dot. */
699 change_decimal_from_locale_to_dot(buffer);
Eric Smith7ef40bf2008-02-20 23:34:22 +0000700
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000701 /* If an exponent exists, ensure that the exponent is at least
702 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
703 for the extra zeros. Also, if there are more than
704 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
705 back to MIN_EXPONENT_DIGITS */
706 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000707
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000708 /* If format_char is 'Z', make sure we have at least one character
709 after the decimal point (and make sure we have a decimal point);
710 also switch to exponential notation in some edge cases where the
711 extra character would produce more significant digits that we
712 really want. */
713 if (format_char == 'Z')
714 buffer = ensure_decimal_point(buffer, buf_size, precision);
Eric Smith8113ca62008-03-17 11:01:01 +0000715
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000716 return buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000717}
718
Mark Dickinson975d7572009-10-26 15:39:50 +0000719char *
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000720PyOS_ascii_formatd(char *buffer,
721 size_t buf_size,
722 const char *format,
723 double d)
Mark Dickinson975d7572009-10-26 15:39:50 +0000724{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000725 if (PyErr_WarnEx(PyExc_DeprecationWarning,
726 "PyOS_ascii_formatd is deprecated, "
727 "use PyOS_double_to_string instead", 1) < 0)
728 return NULL;
Mark Dickinsondf108ca2009-04-29 21:56:53 +0000729
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
Mark Dickinson975d7572009-10-26 15:39:50 +0000731}
Mark Dickinsondf108ca2009-04-29 21:56:53 +0000732
Mark Dickinson975d7572009-10-26 15:39:50 +0000733#ifdef PY_NO_SHORT_FLOAT_REPR
734
735/* The fallback code to use if _Py_dg_dtoa is not available. */
736
Eric Smith068f0652009-04-25 21:40:15 +0000737PyAPI_FUNC(char *) PyOS_double_to_string(double val,
738 char format_code,
739 int precision,
740 int flags,
Mark Dickinson975d7572009-10-26 15:39:50 +0000741 int *type)
Eric Smith068f0652009-04-25 21:40:15 +0000742{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000743 char format[32];
744 Py_ssize_t bufsize;
745 char *buf;
746 int t, exp;
747 int upper = 0;
Eric Smith068f0652009-04-25 21:40:15 +0000748
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000749 /* Validate format_code, and map upper and lower case */
750 switch (format_code) {
751 case 'e': /* exponent */
752 case 'f': /* fixed */
753 case 'g': /* general */
754 break;
755 case 'E':
756 upper = 1;
757 format_code = 'e';
758 break;
759 case 'F':
760 upper = 1;
761 format_code = 'f';
762 break;
763 case 'G':
764 upper = 1;
765 format_code = 'g';
766 break;
767 case 'r': /* repr format */
768 /* Supplied precision is unused, must be 0. */
769 if (precision != 0) {
770 PyErr_BadInternalCall();
771 return NULL;
772 }
773 /* The repr() precision (17 significant decimal digits) is the
774 minimal number that is guaranteed to have enough precision
775 so that if the number is read back in the exact same binary
776 value is recreated. This is true for IEEE floating point
777 by design, and also happens to work for all other modern
778 hardware. */
779 precision = 17;
780 format_code = 'g';
781 break;
782 default:
783 PyErr_BadInternalCall();
784 return NULL;
785 }
Eric Smith068f0652009-04-25 21:40:15 +0000786
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000787 /* Here's a quick-and-dirty calculation to figure out how big a buffer
788 we need. In general, for a finite float we need:
Mark Dickinson975d7572009-10-26 15:39:50 +0000789
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000790 1 byte for each digit of the decimal significand, and
Mark Dickinson975d7572009-10-26 15:39:50 +0000791
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000792 1 for a possible sign
793 1 for a possible decimal point
794 2 for a possible [eE][+-]
795 1 for each digit of the exponent; if we allow 19 digits
796 total then we're safe up to exponents of 2**63.
797 1 for the trailing nul byte
Mark Dickinson975d7572009-10-26 15:39:50 +0000798
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 This gives a total of 24 + the number of digits in the significand,
800 and the number of digits in the significand is:
Mark Dickinson975d7572009-10-26 15:39:50 +0000801
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000802 for 'g' format: at most precision, except possibly
803 when precision == 0, when it's 1.
804 for 'e' format: precision+1
805 for 'f' format: precision digits after the point, at least 1
806 before. To figure out how many digits appear before the point
807 we have to examine the size of the number. If fabs(val) < 1.0
808 then there will be only one digit before the point. If
809 fabs(val) >= 1.0, then there are at most
Mark Dickinson975d7572009-10-26 15:39:50 +0000810
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000811 1+floor(log10(ceiling(fabs(val))))
Mark Dickinson975d7572009-10-26 15:39:50 +0000812
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 digits before the point (where the 'ceiling' allows for the
814 possibility that the rounding rounds the integer part of val
815 up). A safe upper bound for the above quantity is
816 1+floor(exp/3), where exp is the unique integer such that 0.5
817 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
818 frexp.
Mark Dickinson975d7572009-10-26 15:39:50 +0000819
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000820 So we allow room for precision+1 digits for all formats, plus an
821 extra floor(exp/3) digits for 'f' format.
Mark Dickinson975d7572009-10-26 15:39:50 +0000822
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000823 */
Mark Dickinson975d7572009-10-26 15:39:50 +0000824
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000825 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
826 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
827 bufsize = 5;
828 else {
829 bufsize = 25 + precision;
830 if (format_code == 'f' && fabs(val) >= 1.0) {
831 frexp(val, &exp);
832 bufsize += exp/3;
833 }
834 }
Mark Dickinson975d7572009-10-26 15:39:50 +0000835
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000836 buf = PyMem_Malloc(bufsize);
837 if (buf == NULL) {
838 PyErr_NoMemory();
839 return NULL;
840 }
Eric Smithaca19e62009-04-22 13:29:05 +0000841
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000842 /* Handle nan and inf. */
843 if (Py_IS_NAN(val)) {
844 strcpy(buf, "nan");
845 t = Py_DTST_NAN;
846 } else if (Py_IS_INFINITY(val)) {
847 if (copysign(1., val) == 1.)
848 strcpy(buf, "inf");
849 else
850 strcpy(buf, "-inf");
851 t = Py_DTST_INFINITE;
852 } else {
853 t = Py_DTST_FINITE;
854 if (flags & Py_DTSF_ADD_DOT_0)
855 format_code = 'Z';
Mark Dickinson975d7572009-10-26 15:39:50 +0000856
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000857 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
858 (flags & Py_DTSF_ALT ? "#" : ""), precision,
859 format_code);
860 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
861 }
Mark Dickinson975d7572009-10-26 15:39:50 +0000862
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000863 /* Add sign when requested. It's convenient (esp. when formatting
864 complex numbers) to include a sign even for inf and nan. */
865 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
866 size_t len = strlen(buf);
867 /* the bufsize calculations above should ensure that we've got
868 space to add a sign */
869 assert((size_t)bufsize >= len+2);
870 memmove(buf+1, buf, len+1);
871 buf[0] = '+';
872 }
873 if (upper) {
874 /* Convert to upper case. */
875 char *p1;
876 for (p1 = buf; *p1; p1++)
877 *p1 = Py_TOUPPER(*p1);
878 }
Mark Dickinson975d7572009-10-26 15:39:50 +0000879
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000880 if (type)
881 *type = t;
882 return buf;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000883}
Mark Dickinson975d7572009-10-26 15:39:50 +0000884
885#else
886
887/* _Py_dg_dtoa is available. */
888
889/* I'm using a lookup table here so that I don't have to invent a non-locale
890 specific way to convert to uppercase */
891#define OFS_INF 0
892#define OFS_NAN 1
893#define OFS_E 2
894
895/* The lengths of these are known to the code below, so don't change them */
896static char *lc_float_strings[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000897 "inf",
898 "nan",
899 "e",
Mark Dickinson975d7572009-10-26 15:39:50 +0000900};
901static char *uc_float_strings[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000902 "INF",
903 "NAN",
904 "E",
Mark Dickinson975d7572009-10-26 15:39:50 +0000905};
906
907
908/* Convert a double d to a string, and return a PyMem_Malloc'd block of
909 memory contain the resulting string.
910
911 Arguments:
912 d is the double to be converted
913 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
914 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
915 mode is one of '0', '2' or '3', and is completely determined by
916 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
917 precision is the desired precision
918 always_add_sign is nonzero if a '+' sign should be included for positive
919 numbers
920 add_dot_0_if_integer is nonzero if integers in non-exponential form
921 should have ".0" added. Only applies to format codes 'r' and 'g'.
922 use_alt_formatting is nonzero if alternative formatting should be
923 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
924 at most one of use_alt_formatting and add_dot_0_if_integer should
925 be nonzero.
926 type, if non-NULL, will be set to one of these constants to identify
927 the type of the 'd' argument:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 Py_DTST_FINITE
929 Py_DTST_INFINITE
930 Py_DTST_NAN
Mark Dickinson975d7572009-10-26 15:39:50 +0000931
932 Returns a PyMem_Malloc'd block of memory containing the resulting string,
933 or NULL on error. If NULL is returned, the Python error has been set.
934 */
935
936static char *
937format_float_short(double d, char format_code,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000938 int mode, Py_ssize_t precision,
939 int always_add_sign, int add_dot_0_if_integer,
940 int use_alt_formatting, char **float_strings, int *type)
Mark Dickinson975d7572009-10-26 15:39:50 +0000941{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 char *buf = NULL;
943 char *p = NULL;
944 Py_ssize_t bufsize = 0;
945 char *digits, *digits_end;
946 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
947 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
948 _Py_SET_53BIT_PRECISION_HEADER;
Mark Dickinson975d7572009-10-26 15:39:50 +0000949
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000950 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
951 Must be matched by a call to _Py_dg_freedtoa. */
952 _Py_SET_53BIT_PRECISION_START;
953 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
954 &digits_end);
955 _Py_SET_53BIT_PRECISION_END;
Mark Dickinson975d7572009-10-26 15:39:50 +0000956
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000957 decpt = (Py_ssize_t)decpt_as_int;
958 if (digits == NULL) {
959 /* The only failure mode is no memory. */
960 PyErr_NoMemory();
961 goto exit;
962 }
963 assert(digits_end != NULL && digits_end >= digits);
964 digits_len = digits_end - digits;
Mark Dickinson975d7572009-10-26 15:39:50 +0000965
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000966 if (digits_len && !Py_ISDIGIT(digits[0])) {
967 /* Infinities and nans here; adapt Gay's output,
968 so convert Infinity to inf and NaN to nan, and
969 ignore sign of nan. Then return. */
Mark Dickinson975d7572009-10-26 15:39:50 +0000970
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000971 /* ignore the actual sign of a nan */
972 if (digits[0] == 'n' || digits[0] == 'N')
973 sign = 0;
Mark Dickinson975d7572009-10-26 15:39:50 +0000974
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000975 /* We only need 5 bytes to hold the result "+inf\0" . */
976 bufsize = 5; /* Used later in an assert. */
977 buf = (char *)PyMem_Malloc(bufsize);
978 if (buf == NULL) {
979 PyErr_NoMemory();
980 goto exit;
981 }
982 p = buf;
Mark Dickinson975d7572009-10-26 15:39:50 +0000983
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000984 if (sign == 1) {
985 *p++ = '-';
986 }
987 else if (always_add_sign) {
988 *p++ = '+';
989 }
990 if (digits[0] == 'i' || digits[0] == 'I') {
991 strncpy(p, float_strings[OFS_INF], 3);
992 p += 3;
Mark Dickinson975d7572009-10-26 15:39:50 +0000993
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000994 if (type)
995 *type = Py_DTST_INFINITE;
996 }
997 else if (digits[0] == 'n' || digits[0] == 'N') {
998 strncpy(p, float_strings[OFS_NAN], 3);
999 p += 3;
Mark Dickinson975d7572009-10-26 15:39:50 +00001000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001001 if (type)
1002 *type = Py_DTST_NAN;
1003 }
1004 else {
1005 /* shouldn't get here: Gay's code should always return
1006 something starting with a digit, an 'I', or 'N' */
1007 strncpy(p, "ERR", 3);
1008 p += 3;
1009 assert(0);
1010 }
1011 goto exit;
1012 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001013
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001014 /* The result must be finite (not inf or nan). */
1015 if (type)
1016 *type = Py_DTST_FINITE;
Mark Dickinson975d7572009-10-26 15:39:50 +00001017
1018
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001019 /* We got digits back, format them. We may need to pad 'digits'
1020 either on the left or right (or both) with extra zeros, so in
1021 general the resulting string has the form
Mark Dickinson975d7572009-10-26 15:39:50 +00001022
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001023 [<sign>]<zeros><digits><zeros>[<exponent>]
Mark Dickinson975d7572009-10-26 15:39:50 +00001024
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001025 where either of the <zeros> pieces could be empty, and there's a
1026 decimal point that could appear either in <digits> or in the
1027 leading or trailing <zeros>.
Mark Dickinson975d7572009-10-26 15:39:50 +00001028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001029 Imagine an infinite 'virtual' string vdigits, consisting of the
1030 string 'digits' (starting at index 0) padded on both the left and
1031 right with infinite strings of zeros. We want to output a slice
Mark Dickinson975d7572009-10-26 15:39:50 +00001032
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001033 vdigits[vdigits_start : vdigits_end]
Mark Dickinson975d7572009-10-26 15:39:50 +00001034
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001035 of this virtual string. Thus if vdigits_start < 0 then we'll end
1036 up producing some leading zeros; if vdigits_end > digits_len there
1037 will be trailing zeros in the output. The next section of code
1038 determines whether to use an exponent or not, figures out the
1039 position 'decpt' of the decimal point, and computes 'vdigits_start'
1040 and 'vdigits_end'. */
1041 vdigits_end = digits_len;
1042 switch (format_code) {
1043 case 'e':
1044 use_exp = 1;
1045 vdigits_end = precision;
1046 break;
1047 case 'f':
1048 vdigits_end = decpt + precision;
1049 break;
1050 case 'g':
1051 if (decpt <= -4 || decpt >
1052 (add_dot_0_if_integer ? precision-1 : precision))
1053 use_exp = 1;
1054 if (use_alt_formatting)
1055 vdigits_end = precision;
1056 break;
1057 case 'r':
1058 /* convert to exponential format at 1e16. We used to convert
1059 at 1e17, but that gives odd-looking results for some values
1060 when a 16-digit 'shortest' repr is padded with bogus zeros.
1061 For example, repr(2e16+8) would give 20000000000000010.0;
1062 the true value is 20000000000000008.0. */
1063 if (decpt <= -4 || decpt > 16)
1064 use_exp = 1;
1065 break;
1066 default:
1067 PyErr_BadInternalCall();
1068 goto exit;
1069 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001070
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001071 /* if using an exponent, reset decimal point position to 1 and adjust
1072 exponent accordingly.*/
1073 if (use_exp) {
1074 exp = decpt - 1;
1075 decpt = 1;
1076 }
1077 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1078 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1079 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1080 if (!use_exp && add_dot_0_if_integer)
1081 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1082 else
1083 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
Mark Dickinson975d7572009-10-26 15:39:50 +00001084
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001085 /* double check inequalities */
1086 assert(vdigits_start <= 0 &&
1087 0 <= digits_len &&
1088 digits_len <= vdigits_end);
1089 /* decimal point should be in (vdigits_start, vdigits_end] */
1090 assert(vdigits_start < decpt && decpt <= vdigits_end);
Mark Dickinson975d7572009-10-26 15:39:50 +00001091
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001092 /* Compute an upper bound how much memory we need. This might be a few
1093 chars too long, but no big deal. */
1094 bufsize =
1095 /* sign, decimal point and trailing 0 byte */
1096 3 +
Mark Dickinson975d7572009-10-26 15:39:50 +00001097
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001098 /* total digit count (including zero padding on both sides) */
1099 (vdigits_end - vdigits_start) +
Mark Dickinson975d7572009-10-26 15:39:50 +00001100
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001101 /* exponent "e+100", max 3 numerical digits */
1102 (use_exp ? 5 : 0);
Mark Dickinson975d7572009-10-26 15:39:50 +00001103
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001104 /* Now allocate the memory and initialize p to point to the start of
1105 it. */
1106 buf = (char *)PyMem_Malloc(bufsize);
1107 if (buf == NULL) {
1108 PyErr_NoMemory();
1109 goto exit;
1110 }
1111 p = buf;
Mark Dickinson975d7572009-10-26 15:39:50 +00001112
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001113 /* Add a negative sign if negative, and a plus sign if non-negative
1114 and always_add_sign is true. */
1115 if (sign == 1)
1116 *p++ = '-';
1117 else if (always_add_sign)
1118 *p++ = '+';
Mark Dickinson975d7572009-10-26 15:39:50 +00001119
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001120 /* note that exactly one of the three 'if' conditions is true,
1121 so we include exactly one decimal point */
1122 /* Zero padding on left of digit string */
1123 if (decpt <= 0) {
1124 memset(p, '0', decpt-vdigits_start);
1125 p += decpt - vdigits_start;
1126 *p++ = '.';
1127 memset(p, '0', 0-decpt);
1128 p += 0-decpt;
1129 }
1130 else {
1131 memset(p, '0', 0-vdigits_start);
1132 p += 0 - vdigits_start;
1133 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001134
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001135 /* Digits, with included decimal point */
1136 if (0 < decpt && decpt <= digits_len) {
1137 strncpy(p, digits, decpt-0);
1138 p += decpt-0;
1139 *p++ = '.';
1140 strncpy(p, digits+decpt, digits_len-decpt);
1141 p += digits_len-decpt;
1142 }
1143 else {
1144 strncpy(p, digits, digits_len);
1145 p += digits_len;
1146 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001147
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 /* And zeros on the right */
1149 if (digits_len < decpt) {
1150 memset(p, '0', decpt-digits_len);
1151 p += decpt-digits_len;
1152 *p++ = '.';
1153 memset(p, '0', vdigits_end-decpt);
1154 p += vdigits_end-decpt;
1155 }
1156 else {
1157 memset(p, '0', vdigits_end-digits_len);
1158 p += vdigits_end-digits_len;
1159 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 /* Delete a trailing decimal pt unless using alternative formatting. */
1162 if (p[-1] == '.' && !use_alt_formatting)
1163 p--;
Mark Dickinson975d7572009-10-26 15:39:50 +00001164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001165 /* Now that we've done zero padding, add an exponent if needed. */
1166 if (use_exp) {
1167 *p++ = float_strings[OFS_E][0];
1168 exp_len = sprintf(p, "%+.02d", exp);
1169 p += exp_len;
1170 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001171 exit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001172 if (buf) {
1173 *p = '\0';
1174 /* It's too late if this fails, as we've already stepped on
1175 memory that isn't ours. But it's an okay debugging test. */
1176 assert(p-buf < bufsize);
1177 }
1178 if (digits)
1179 _Py_dg_freedtoa(digits);
Mark Dickinson975d7572009-10-26 15:39:50 +00001180
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 return buf;
Mark Dickinson975d7572009-10-26 15:39:50 +00001182}
1183
1184
1185PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001186 char format_code,
1187 int precision,
1188 int flags,
1189 int *type)
Mark Dickinson975d7572009-10-26 15:39:50 +00001190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001191 char **float_strings = lc_float_strings;
1192 int mode;
Mark Dickinson975d7572009-10-26 15:39:50 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Validate format_code, and map upper and lower case. Compute the
1195 mode and make any adjustments as needed. */
1196 switch (format_code) {
1197 /* exponent */
1198 case 'E':
1199 float_strings = uc_float_strings;
1200 format_code = 'e';
1201 /* Fall through. */
1202 case 'e':
1203 mode = 2;
1204 precision++;
1205 break;
Mark Dickinson975d7572009-10-26 15:39:50 +00001206
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001207 /* fixed */
1208 case 'F':
1209 float_strings = uc_float_strings;
1210 format_code = 'f';
1211 /* Fall through. */
1212 case 'f':
1213 mode = 3;
1214 break;
Mark Dickinson975d7572009-10-26 15:39:50 +00001215
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001216 /* general */
1217 case 'G':
1218 float_strings = uc_float_strings;
1219 format_code = 'g';
1220 /* Fall through. */
1221 case 'g':
1222 mode = 2;
1223 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1224 if (precision == 0)
1225 precision = 1;
1226 break;
Mark Dickinson975d7572009-10-26 15:39:50 +00001227
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001228 /* repr format */
1229 case 'r':
1230 mode = 0;
1231 /* Supplied precision is unused, must be 0. */
1232 if (precision != 0) {
1233 PyErr_BadInternalCall();
1234 return NULL;
1235 }
1236 break;
Mark Dickinson975d7572009-10-26 15:39:50 +00001237
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001238 default:
1239 PyErr_BadInternalCall();
1240 return NULL;
1241 }
Mark Dickinson975d7572009-10-26 15:39:50 +00001242
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001243 return format_float_short(val, format_code, mode, precision,
1244 flags & Py_DTSF_SIGN,
1245 flags & Py_DTSF_ADD_DOT_0,
1246 flags & Py_DTSF_ALT,
1247 float_strings, type);
Mark Dickinson975d7572009-10-26 15:39:50 +00001248}
1249#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */