blob: 6d1e7edf88e85647ecf8ff2bb68440d87138c68f [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
Martin v. Löwis737ea822004-06-08 18:52:54 +00006/**
7 * PyOS_ascii_strtod:
8 * @nptr: the string to convert to a numeric value.
9 * @endptr: if non-%NULL, it returns the character after
10 * the last character used in the conversion.
11 *
12 * Converts a string to a #gdouble value.
13 * This function behaves like the standard strtod() function
14 * does in the C locale. It does this without actually
15 * changing the current locale, since that would not be
16 * thread-safe.
17 *
18 * This function is typically used when reading configuration
19 * files or other non-user input that should be locale independent.
20 * To handle input from the user you should normally use the
21 * locale-sensitive system strtod() function.
22 *
23 * If the correct value would cause overflow, plus or minus %HUGE_VAL
24 * is returned (according to the sign of the value), and %ERANGE is
25 * stored in %errno. If the correct value would cause underflow,
26 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000027 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000028 *
29 * This function resets %errno before calling strtod() so that
30 * you can reliably detect overflow and underflow.
31 *
32 * Return value: the #gdouble value.
33 **/
Eric Smith0923d1d2009-04-16 20:16:10 +000034
35#ifndef PY_NO_SHORT_FLOAT_REPR
36
37double
38PyOS_ascii_strtod(const char *nptr, char **endptr)
39{
40 double result;
41 _Py_SET_53BIT_PRECISION_HEADER;
42
43 assert(nptr != NULL);
44 /* Set errno to zero, so that we can distinguish zero results
45 and underflows */
46 errno = 0;
47
48 _Py_SET_53BIT_PRECISION_START;
49 result = _Py_dg_strtod(nptr, endptr);
50 _Py_SET_53BIT_PRECISION_END;
51
52 return result;
53
54}
55
56#else
57
58/*
59 Use system strtod; since strtod is locale aware, we may
60 have to first fix the decimal separator.
61
62 Note that unlike _Py_dg_strtod, the system strtod may not always give
63 correctly rounded results.
64*/
65
Martin v. Löwis737ea822004-06-08 18:52:54 +000066double
Neal Norwitze7214a12005-12-18 05:03:17 +000067PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000068{
69 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000070 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000071 struct lconv *locale_data;
72 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000073 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000074 const char *p, *decimal_point_pos;
75 const char *end = NULL; /* Silence gcc */
Christian Heimesfaf2f632008-01-06 16:59:19 +000076 const char *digits_pos = NULL;
77 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000078
Martin v. Löwis737ea822004-06-08 18:52:54 +000079 assert(nptr != NULL);
80
81 fail_pos = NULL;
82
83 locale_data = localeconv();
84 decimal_point = locale_data->decimal_point;
85 decimal_point_len = strlen(decimal_point);
86
87 assert(decimal_point_len != 0);
88
89 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +000090
Mark Dickinson6d65df12009-04-26 15:30:47 +000091 /* Set errno to zero, so that we can distinguish zero results
92 and underflows */
93 errno = 0;
94
Christian Heimesfaf2f632008-01-06 16:59:19 +000095 /* We process any leading whitespace and the optional sign manually,
96 then pass the remainder to the system strtod. This ensures that
97 the result of an underflow has the correct sign. (bug #1725) */
98
99 p = nptr;
100 /* Skip leading space */
Eric Smith6dc46f52009-04-27 20:39:49 +0000101 while (Py_ISSPACE(*p))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000102 p++;
103
104 /* Process leading sign, if present */
105 if (*p == '-') {
106 negate = 1;
107 p++;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000108 }
109 else if (*p == '+') {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000110 p++;
111 }
112
Mark Dickinson6d65df12009-04-26 15:30:47 +0000113 /* Parse infinities and nans */
114 if (*p == 'i' || *p == 'I') {
115 if (PyOS_strnicmp(p, "inf", 3) == 0) {
116 val = Py_HUGE_VAL;
117 if (PyOS_strnicmp(p+3, "inity", 5) == 0)
118 fail_pos = (char *)p+8;
119 else
120 fail_pos = (char *)p+3;
121 goto got_val;
122 }
123 else
124 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000125 }
Mark Dickinson6d65df12009-04-26 15:30:47 +0000126#ifdef Py_NAN
127 if (*p == 'n' || *p == 'N') {
128 if (PyOS_strnicmp(p, "nan", 3) == 0) {
129 val = Py_NAN;
130 fail_pos = (char *)p+3;
131 goto got_val;
132 }
133 else
134 goto invalid_string;
135 }
136#endif
Christian Heimesfaf2f632008-01-06 16:59:19 +0000137
Mark Dickinson6d65df12009-04-26 15:30:47 +0000138 /* Some platform strtods accept hex floats; Python shouldn't (at the
139 moment), so we check explicitly for strings starting with '0x'. */
140 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
141 goto invalid_string;
142
143 /* Check that what's left begins with a digit or decimal point */
Eric Smith6dc46f52009-04-27 20:39:49 +0000144 if (!Py_ISDIGIT(*p) && *p != '.')
Mark Dickinson6d65df12009-04-26 15:30:47 +0000145 goto invalid_string;
146
147 digits_pos = p;
148 if (decimal_point[0] != '.' ||
Martin v. Löwis737ea822004-06-08 18:52:54 +0000149 decimal_point[1] != 0)
150 {
Mark Dickinson6d65df12009-04-26 15:30:47 +0000151 /* Look for a '.' in the input; if present, it'll need to be
152 swapped for the current locale's decimal point before we
153 call strtod. On the other hand, if we find the current
154 locale's decimal point then the input is invalid. */
Eric Smith6dc46f52009-04-27 20:39:49 +0000155 while (Py_ISDIGIT(*p))
Neal Norwitze7214a12005-12-18 05:03:17 +0000156 p++;
157
158 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000159 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000160 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000161
Mark Dickinson6d65df12009-04-26 15:30:47 +0000162 /* locate end of number */
Eric Smith6dc46f52009-04-27 20:39:49 +0000163 while (Py_ISDIGIT(*p))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000164 p++;
165
Neal Norwitze7214a12005-12-18 05:03:17 +0000166 if (*p == 'e' || *p == 'E')
167 p++;
168 if (*p == '+' || *p == '-')
169 p++;
Eric Smith6dc46f52009-04-27 20:39:49 +0000170 while (Py_ISDIGIT(*p))
Neal Norwitze7214a12005-12-18 05:03:17 +0000171 p++;
172 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000173 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000175 /* Python bug #1417699 */
Mark Dickinson6d65df12009-04-26 15:30:47 +0000176 goto invalid_string;
Christian Heimesb186d002008-03-18 15:15:01 +0000177 /* For the other cases, we need not convert the decimal
178 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179 }
180
Mark Dickinson6d65df12009-04-26 15:30:47 +0000181 if (decimal_point_pos) {
Martin v. Löwis737ea822004-06-08 18:52:54 +0000182 char *copy, *c;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000183 /* Create a copy of the input, with the '.' converted to the
184 locale-specific decimal point */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000185 copy = (char *)PyMem_MALLOC(end - digits_pos +
186 1 + decimal_point_len);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000187 if (copy == NULL) {
188 if (endptr)
189 *endptr = (char *)nptr;
190 errno = ENOMEM;
191 return val;
192 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000193
194 c = copy;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000195 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
196 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000197 memcpy(c, decimal_point, decimal_point_len);
198 c += decimal_point_len;
Christian Heimesb186d002008-03-18 15:15:01 +0000199 memcpy(c, decimal_point_pos + 1,
200 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000201 c += end - (decimal_point_pos + 1);
202 *c = 0;
203
204 val = strtod(copy, &fail_pos);
205
206 if (fail_pos)
207 {
208 if (fail_pos > decimal_point_pos)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000209 fail_pos = (char *)digits_pos +
210 (fail_pos - copy) -
211 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000212 else
Christian Heimesfaf2f632008-01-06 16:59:19 +0000213 fail_pos = (char *)digits_pos +
214 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000215 }
216
Thomas Wouters477c8d52006-05-27 19:21:47 +0000217 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000218
219 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000220 else {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000221 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000222 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000223
Christian Heimesfaf2f632008-01-06 16:59:19 +0000224 if (fail_pos == digits_pos)
Mark Dickinson6d65df12009-04-26 15:30:47 +0000225 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000226
Mark Dickinson6d65df12009-04-26 15:30:47 +0000227 got_val:
Christian Heimesfaf2f632008-01-06 16:59:19 +0000228 if (negate && fail_pos != nptr)
229 val = -val;
230
Martin v. Löwis737ea822004-06-08 18:52:54 +0000231 if (endptr)
232 *endptr = fail_pos;
233
234 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000235
236 invalid_string:
237 if (endptr)
238 *endptr = (char*)nptr;
239 errno = EINVAL;
240 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000241}
242
Eric Smith0923d1d2009-04-16 20:16:10 +0000243#endif
244
245double
246PyOS_ascii_atof(const char *nptr)
247{
248 return PyOS_ascii_strtod(nptr, NULL);
249}
250
251
Eric Smithb2c7af82008-04-30 02:12:09 +0000252/* Given a string that may have a decimal point in the current
253 locale, change it back to a dot. Since the string cannot get
254 longer, no need for a maximum buffer size parameter. */
255Py_LOCAL_INLINE(void)
256change_decimal_from_locale_to_dot(char* buffer)
257{
258 struct lconv *locale_data = localeconv();
259 const char *decimal_point = locale_data->decimal_point;
260
261 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
262 size_t decimal_point_len = strlen(decimal_point);
263
264 if (*buffer == '+' || *buffer == '-')
265 buffer++;
Eric Smith6dc46f52009-04-27 20:39:49 +0000266 while (Py_ISDIGIT(*buffer))
Eric Smithb2c7af82008-04-30 02:12:09 +0000267 buffer++;
268 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
269 *buffer = '.';
270 buffer++;
271 if (decimal_point_len > 1) {
272 /* buffer needs to get smaller */
273 size_t rest_len = strlen(buffer +
274 (decimal_point_len - 1));
275 memmove(buffer,
276 buffer + (decimal_point_len - 1),
277 rest_len);
278 buffer[rest_len] = 0;
279 }
280 }
281 }
282}
283
Martin v. Löwis737ea822004-06-08 18:52:54 +0000284
Christian Heimesc3f30c42008-02-22 16:37:40 +0000285/* From the C99 standard, section 7.19.6:
286The exponent always contains at least two digits, and only as many more digits
287as necessary to represent the exponent.
288*/
289#define MIN_EXPONENT_DIGITS 2
290
Eric Smithb2c7af82008-04-30 02:12:09 +0000291/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
292 in length. */
293Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000294ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000295{
296 char *p = strpbrk(buffer, "eE");
297 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
298 char *start = p + 2;
299 int exponent_digit_cnt = 0;
300 int leading_zero_cnt = 0;
301 int in_leading_zeros = 1;
302 int significant_digit_cnt;
303
304 /* Skip over the exponent and the sign. */
305 p += 2;
306
307 /* Find the end of the exponent, keeping track of leading
308 zeros. */
Eric Smith6dc46f52009-04-27 20:39:49 +0000309 while (*p && Py_ISDIGIT(*p)) {
Eric Smithb2c7af82008-04-30 02:12:09 +0000310 if (in_leading_zeros && *p == '0')
311 ++leading_zero_cnt;
312 if (*p != '0')
313 in_leading_zeros = 0;
314 ++p;
315 ++exponent_digit_cnt;
316 }
317
318 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
319 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
320 /* If there are 2 exactly digits, we're done,
321 regardless of what they contain */
322 }
323 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
324 int extra_zeros_cnt;
325
326 /* There are more than 2 digits in the exponent. See
327 if we can delete some of the leading zeros */
328 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
329 significant_digit_cnt = MIN_EXPONENT_DIGITS;
330 extra_zeros_cnt = exponent_digit_cnt -
331 significant_digit_cnt;
332
333 /* Delete extra_zeros_cnt worth of characters from the
334 front of the exponent */
335 assert(extra_zeros_cnt >= 0);
336
337 /* Add one to significant_digit_cnt to copy the
338 trailing 0 byte, thus setting the length */
339 memmove(start,
340 start + extra_zeros_cnt,
341 significant_digit_cnt + 1);
342 }
343 else {
344 /* If there are fewer than 2 digits, add zeros
345 until there are 2, if there's enough room */
346 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
347 if (start + zeros + exponent_digit_cnt + 1
348 < buffer + buf_size) {
349 memmove(start + zeros, start,
350 exponent_digit_cnt + 1);
351 memset(start, '0', zeros);
352 }
353 }
354 }
355}
356
Eric Smith0923d1d2009-04-16 20:16:10 +0000357/* Ensure that buffer has a decimal point in it. The decimal point will not
358 be in the current locale, it will always be '.'. Don't add a decimal if an
359 exponent is present. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000360Py_LOCAL_INLINE(void)
361ensure_decimal_point(char* buffer, size_t buf_size)
362{
363 int insert_count = 0;
364 char* chars_to_insert;
365
366 /* search for the first non-digit character */
367 char *p = buffer;
Eric Smith2ad79e82008-07-19 00:33:23 +0000368 if (*p == '-' || *p == '+')
369 /* Skip leading sign, if present. I think this could only
370 ever be '-', but it can't hurt to check for both. */
371 ++p;
Eric Smith6dc46f52009-04-27 20:39:49 +0000372 while (*p && Py_ISDIGIT(*p))
Eric Smithb2c7af82008-04-30 02:12:09 +0000373 ++p;
374
375 if (*p == '.') {
Eric Smith6dc46f52009-04-27 20:39:49 +0000376 if (Py_ISDIGIT(*(p+1))) {
Eric Smithb2c7af82008-04-30 02:12:09 +0000377 /* Nothing to do, we already have a decimal
378 point and a digit after it */
379 }
380 else {
381 /* We have a decimal point, but no following
382 digit. Insert a zero after the decimal. */
383 ++p;
384 chars_to_insert = "0";
385 insert_count = 1;
386 }
387 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000388 else if (!(*p == 'e' || *p == 'E')) {
389 /* Don't add ".0" if we have an exponent. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000390 chars_to_insert = ".0";
391 insert_count = 2;
392 }
393 if (insert_count) {
394 size_t buf_len = strlen(buffer);
395 if (buf_len + insert_count + 1 >= buf_size) {
396 /* If there is not enough room in the buffer
397 for the additional text, just skip it. It's
398 not worth generating an error over. */
399 }
400 else {
401 memmove(p + insert_count, p,
402 buffer + strlen(buffer) - p + 1);
403 memcpy(p, chars_to_insert, insert_count);
404 }
405 }
406}
407
Christian Heimesc3f30c42008-02-22 16:37:40 +0000408/* see FORMATBUFLEN in unicodeobject.c */
409#define FLOAT_FORMATBUFLEN 120
410
Martin v. Löwis737ea822004-06-08 18:52:54 +0000411/**
412 * PyOS_ascii_formatd:
413 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000414 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000415 * @format: The printf()-style format to use for the
416 * code to use for converting.
417 * @d: The #gdouble to convert
418 *
419 * Converts a #gdouble to a string, using the '.' as
420 * decimal point. To format the number you pass in
421 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000422 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000423 *
Christian Heimesb186d002008-03-18 15:15:01 +0000424 * 'Z' is the same as 'g', except it always has a decimal and
425 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000426 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000427 * Return value: The pointer to the buffer with the converted string.
428 **/
429char *
Eric Smithcc32a112009-04-26 21:35:14 +0000430_PyOS_ascii_formatd(char *buffer,
Christian Heimesb186d002008-03-18 15:15:01 +0000431 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000432 const char *format,
433 double d)
434{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000435 char format_char;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000436 size_t format_len = strlen(format);
437
Christian Heimesb186d002008-03-18 15:15:01 +0000438 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
439 also with at least one character past the decimal. */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000440 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000441
Christian Heimesc3f30c42008-02-22 16:37:40 +0000442 /* The last character in the format string must be the format char */
443 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000444
Martin v. Löwis737ea822004-06-08 18:52:54 +0000445 if (format[0] != '%')
446 return NULL;
447
Christian Heimesc3f30c42008-02-22 16:37:40 +0000448 /* I'm not sure why this test is here. It's ensuring that the format
449 string after the first character doesn't have a single quote, a
450 lowercase l, or a percent. This is the reverse of the commented-out
451 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000452 if (strpbrk(format + 1, "'l%"))
453 return NULL;
454
Christian Heimesb186d002008-03-18 15:15:01 +0000455 /* Also curious about this function is that it accepts format strings
456 like "%xg", which are invalid for floats. In general, the
457 interface to this function is not very good, but changing it is
458 difficult because it's a public API. */
459
Martin v. Löwis737ea822004-06-08 18:52:54 +0000460 if (!(format_char == 'e' || format_char == 'E' ||
461 format_char == 'f' || format_char == 'F' ||
Christian Heimesc3f30c42008-02-22 16:37:40 +0000462 format_char == 'g' || format_char == 'G' ||
Eric Smith0923d1d2009-04-16 20:16:10 +0000463 format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000464 return NULL;
465
Eric Smith0923d1d2009-04-16 20:16:10 +0000466 /* Map 'Z' format_char to 'g', by copying the format string and
Christian Heimesb186d002008-03-18 15:15:01 +0000467 replacing the final char with a 'g' */
Eric Smith0923d1d2009-04-16 20:16:10 +0000468 if (format_char == 'Z') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000469 if (format_len + 1 >= sizeof(tmp_format)) {
470 /* The format won't fit in our copy. Error out. In
Christian Heimesb186d002008-03-18 15:15:01 +0000471 practice, this will never happen and will be
472 detected by returning NULL */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000473 return NULL;
474 }
475 strcpy(tmp_format, format);
476 tmp_format[format_len - 1] = 'g';
477 format = tmp_format;
478 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000479
Christian Heimesb186d002008-03-18 15:15:01 +0000480
Christian Heimesc3f30c42008-02-22 16:37:40 +0000481 /* Have PyOS_snprintf do the hard work */
Christian Heimesb186d002008-03-18 15:15:01 +0000482 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000483
Eric Smithb2c7af82008-04-30 02:12:09 +0000484 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000485
Eric Smithb2c7af82008-04-30 02:12:09 +0000486 /* Get the current locale, and find the decimal point string.
Eric Smith0923d1d2009-04-16 20:16:10 +0000487 Convert that string back to a dot. */
488 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000489
490 /* If an exponent exists, ensure that the exponent is at least
491 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
492 for the extra zeros. Also, if there are more than
493 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
494 back to MIN_EXPONENT_DIGITS */
Mark Dickinsonce95e562009-04-26 20:02:24 +0000495 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000496
Christian Heimesb186d002008-03-18 15:15:01 +0000497 /* If format_char is 'Z', make sure we have at least one character
498 after the decimal point (and make sure we have a decimal point). */
Eric Smithb2c7af82008-04-30 02:12:09 +0000499 if (format_char == 'Z')
500 ensure_decimal_point(buffer, buf_size);
Christian Heimesb186d002008-03-18 15:15:01 +0000501
Martin v. Löwis737ea822004-06-08 18:52:54 +0000502 return buffer;
503}
504
Eric Smithcc32a112009-04-26 21:35:14 +0000505char *
506PyOS_ascii_formatd(char *buffer,
507 size_t buf_size,
508 const char *format,
509 double d)
510{
511 if (PyErr_WarnEx(PyExc_DeprecationWarning,
512 "PyOS_ascii_formatd is deprecated, "
513 "use PyOS_double_to_string instead", 1) < 0)
514 return NULL;
515
516 return _PyOS_ascii_formatd(buffer, buf_size, format, d);
517}
518
Eric Smith0923d1d2009-04-16 20:16:10 +0000519#ifdef PY_NO_SHORT_FLOAT_REPR
520
521/* The fallback code to use if _Py_dg_dtoa is not available. */
522
Mark Dickinson3370cce2009-04-17 22:40:53 +0000523/* Remove trailing zeros after the decimal point from a numeric string; also
524 remove the decimal point if all digits following it are zero. The numeric
525 string must end in '\0', and should not have any leading or trailing
526 whitespace. Assumes that the decimal point is '.'. */
527Py_LOCAL_INLINE(void)
528remove_trailing_zeros(char *buffer)
529{
530 char *old_fraction_end, *new_fraction_end, *end, *p;
531
532 p = buffer;
533 if (*p == '-' || *p == '+')
534 /* Skip leading sign, if present */
535 ++p;
536 while (isdigit(Py_CHARMASK(*p)))
537 ++p;
538
539 /* if there's no decimal point there's nothing to do */
540 if (*p++ != '.')
541 return;
542
543 /* scan any digits after the point */
544 while (isdigit(Py_CHARMASK(*p)))
545 ++p;
546 old_fraction_end = p;
547
548 /* scan up to ending '\0' */
549 while (*p != '\0')
550 p++;
551 /* +1 to make sure that we move the null byte as well */
552 end = p+1;
553
554 /* scan back from fraction_end, looking for removable zeros */
555 p = old_fraction_end;
556 while (*(p-1) == '0')
557 --p;
558 /* and remove point if we've got that far */
559 if (*(p-1) == '.')
560 --p;
561 new_fraction_end = p;
562
563 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
564}
565
566
Eric Smith0923d1d2009-04-16 20:16:10 +0000567PyAPI_FUNC(char *) PyOS_double_to_string(double val,
568 char format_code,
569 int precision,
570 int flags,
571 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000572{
Eric Smith0923d1d2009-04-16 20:16:10 +0000573 char buf[128];
574 char format[32];
575 Py_ssize_t len;
576 char *result;
577 char *p;
578 int t;
579 int upper = 0;
Mark Dickinson3370cce2009-04-17 22:40:53 +0000580 int strip_trailing_zeros = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000581
582 /* Validate format_code, and map upper and lower case */
583 switch (format_code) {
584 case 'e': /* exponent */
585 case 'f': /* fixed */
586 case 'g': /* general */
587 break;
588 case 'E':
589 upper = 1;
590 format_code = 'e';
591 break;
592 case 'F':
593 upper = 1;
594 format_code = 'f';
595 break;
596 case 'G':
597 upper = 1;
598 format_code = 'g';
599 break;
600 case 'r': /* repr format */
601 /* Supplied precision is unused, must be 0. */
602 if (precision != 0) {
603 PyErr_BadInternalCall();
604 return NULL;
605 }
606 precision = 17;
607 format_code = 'g';
608 break;
609 case 's': /* str format */
610 /* Supplied precision is unused, must be 0. */
611 if (precision != 0) {
612 PyErr_BadInternalCall();
613 return NULL;
614 }
Mark Dickinson3370cce2009-04-17 22:40:53 +0000615 /* switch to exponential notation at 1e11, or 1e12 if we're
616 not adding a .0 */
617 if (fabs(val) >= (flags & Py_DTSF_ADD_DOT_0 ? 1e11 : 1e12)) {
618 precision = 11;
619 format_code = 'e';
620 strip_trailing_zeros = 1;
621 }
622 else {
623 precision = 12;
624 format_code = 'g';
625 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000626 break;
627 default:
628 PyErr_BadInternalCall();
629 return NULL;
630 }
631
632 /* Handle nan and inf. */
633 if (Py_IS_NAN(val)) {
634 strcpy(buf, "nan");
635 t = Py_DTST_NAN;
636 } else if (Py_IS_INFINITY(val)) {
637 if (copysign(1., val) == 1.)
638 strcpy(buf, "inf");
639 else
640 strcpy(buf, "-inf");
641 t = Py_DTST_INFINITE;
642 } else {
643 t = Py_DTST_FINITE;
644
645
Mark Dickinson3370cce2009-04-17 22:40:53 +0000646 if ((flags & Py_DTSF_ADD_DOT_0) && (format_code != 'e'))
Eric Smith0923d1d2009-04-16 20:16:10 +0000647 format_code = 'Z';
648
Eric Smithcc32a112009-04-26 21:35:14 +0000649 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
650 (flags & Py_DTSF_ALT ? "#" : ""), precision,
651 format_code);
652 _PyOS_ascii_formatd(buf, sizeof(buf), format, val);
Mark Dickinson3370cce2009-04-17 22:40:53 +0000653 /* remove trailing zeros if necessary */
654 if (strip_trailing_zeros)
655 remove_trailing_zeros(buf);
Eric Smith0923d1d2009-04-16 20:16:10 +0000656 }
657
658 len = strlen(buf);
659
660 /* Add 1 for the trailing 0 byte.
661 Add 1 because we might need to make room for the sign.
662 */
663 result = PyMem_Malloc(len + 2);
664 if (result == NULL) {
665 PyErr_NoMemory();
666 return NULL;
667 }
668 p = result;
669
Mark Dickinsonad476da2009-04-23 19:14:16 +0000670 /* Add sign when requested. It's convenient (esp. when formatting
671 complex numbers) to include a sign even for inf and nan. */
672 if (flags & Py_DTSF_SIGN && buf[0] != '-')
Eric Smith0923d1d2009-04-16 20:16:10 +0000673 *p++ = '+';
674
675 strcpy(p, buf);
676
677 if (upper) {
678 /* Convert to upper case. */
679 char *p1;
680 for (p1 = p; *p1; p1++)
681 *p1 = toupper(*p1);
682 }
683
684 if (type)
685 *type = t;
686 return result;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000687}
Eric Smith0923d1d2009-04-16 20:16:10 +0000688
689#else
690
691/* _Py_dg_dtoa is available. */
692
693/* I'm using a lookup table here so that I don't have to invent a non-locale
694 specific way to convert to uppercase */
695#define OFS_INF 0
696#define OFS_NAN 1
697#define OFS_E 2
698
699/* The lengths of these are known to the code below, so don't change them */
700static char *lc_float_strings[] = {
701 "inf",
702 "nan",
703 "e",
704};
705static char *uc_float_strings[] = {
706 "INF",
707 "NAN",
708 "E",
709};
710
711
712/* Convert a double d to a string, and return a PyMem_Malloc'd block of
713 memory contain the resulting string.
714
715 Arguments:
716 d is the double to be converted
717 format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g'
718 correspond to '%e', '%f' and '%g'; 'r' and 's' correspond
719 to repr and str.
720 mode is one of '0', '2' or '3', and is completely determined by
721 format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0.
722 precision is the desired precision
723 always_add_sign is nonzero if a '+' sign should be included for positive
724 numbers
725 add_dot_0_if_integer is nonzero if integers in non-exponential form
726 should have ".0" added. Only applies to format codes 'r', 's', and 'g'.
727 use_alt_formatting is nonzero if alternative formatting should be
728 used. Only applies to format codes 'e', 'f' and 'g'.
729 type, if non-NULL, will be set to one of these constants to identify
730 the type of the 'd' argument:
731 Py_DTST_FINITE
732 Py_DTST_INFINITE
733 Py_DTST_NAN
734
735 Returns a PyMem_Malloc'd block of memory containing the resulting string,
736 or NULL on error. If NULL is returned, the Python error has been set.
737 */
738
739static char *
740format_float_short(double d, char format_code,
741 int mode, Py_ssize_t precision,
742 int always_add_sign, int add_dot_0_if_integer,
743 int use_alt_formatting, char **float_strings, int *type)
744{
745 char *buf = NULL;
746 char *p = NULL;
747 Py_ssize_t bufsize = 0;
748 char *digits, *digits_end;
749 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
750 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
751 _Py_SET_53BIT_PRECISION_HEADER;
752
753 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
754 Must be matched by a call to _Py_dg_freedtoa. */
755 _Py_SET_53BIT_PRECISION_START;
756 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
757 &digits_end);
758 _Py_SET_53BIT_PRECISION_END;
759
760 decpt = (Py_ssize_t)decpt_as_int;
761 if (digits == NULL) {
762 /* The only failure mode is no memory. */
763 PyErr_NoMemory();
764 goto exit;
765 }
766 assert(digits_end != NULL && digits_end >= digits);
767 digits_len = digits_end - digits;
768
Mark Dickinson3370cce2009-04-17 22:40:53 +0000769 if (digits_len && !isdigit(Py_CHARMASK(digits[0]))) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000770 /* Infinities and nans here; adapt Gay's output,
771 so convert Infinity to inf and NaN to nan, and
772 ignore sign of nan. Then return. */
773
Mark Dickinsonad476da2009-04-23 19:14:16 +0000774 /* ignore the actual sign of a nan */
775 if (digits[0] == 'n' || digits[0] == 'N')
776 sign = 0;
777
Eric Smith0923d1d2009-04-16 20:16:10 +0000778 /* We only need 5 bytes to hold the result "+inf\0" . */
779 bufsize = 5; /* Used later in an assert. */
780 buf = (char *)PyMem_Malloc(bufsize);
781 if (buf == NULL) {
782 PyErr_NoMemory();
783 goto exit;
784 }
785 p = buf;
786
Mark Dickinsonad476da2009-04-23 19:14:16 +0000787 if (sign == 1) {
788 *p++ = '-';
789 }
790 else if (always_add_sign) {
791 *p++ = '+';
792 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000793 if (digits[0] == 'i' || digits[0] == 'I') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000794 strncpy(p, float_strings[OFS_INF], 3);
795 p += 3;
796
797 if (type)
798 *type = Py_DTST_INFINITE;
799 }
800 else if (digits[0] == 'n' || digits[0] == 'N') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000801 strncpy(p, float_strings[OFS_NAN], 3);
802 p += 3;
803
804 if (type)
805 *type = Py_DTST_NAN;
806 }
807 else {
808 /* shouldn't get here: Gay's code should always return
809 something starting with a digit, an 'I', or 'N' */
810 strncpy(p, "ERR", 3);
811 p += 3;
812 assert(0);
813 }
814 goto exit;
815 }
816
817 /* The result must be finite (not inf or nan). */
818 if (type)
819 *type = Py_DTST_FINITE;
820
821
822 /* We got digits back, format them. We may need to pad 'digits'
823 either on the left or right (or both) with extra zeros, so in
824 general the resulting string has the form
825
826 [<sign>]<zeros><digits><zeros>[<exponent>]
827
828 where either of the <zeros> pieces could be empty, and there's a
829 decimal point that could appear either in <digits> or in the
830 leading or trailing <zeros>.
831
832 Imagine an infinite 'virtual' string vdigits, consisting of the
833 string 'digits' (starting at index 0) padded on both the left and
834 right with infinite strings of zeros. We want to output a slice
835
836 vdigits[vdigits_start : vdigits_end]
837
838 of this virtual string. Thus if vdigits_start < 0 then we'll end
839 up producing some leading zeros; if vdigits_end > digits_len there
840 will be trailing zeros in the output. The next section of code
841 determines whether to use an exponent or not, figures out the
842 position 'decpt' of the decimal point, and computes 'vdigits_start'
843 and 'vdigits_end'. */
844 vdigits_end = digits_len;
845 switch (format_code) {
846 case 'e':
847 use_exp = 1;
848 vdigits_end = precision;
849 break;
850 case 'f':
851 vdigits_end = decpt + precision;
852 break;
853 case 'g':
854 if (decpt <= -4 || decpt > precision)
855 use_exp = 1;
856 if (use_alt_formatting)
857 vdigits_end = precision;
858 break;
859 case 'r':
860 /* convert to exponential format at 1e16. We used to convert
861 at 1e17, but that gives odd-looking results for some values
862 when a 16-digit 'shortest' repr is padded with bogus zeros.
863 For example, repr(2e16+8) would give 20000000000000010.0;
864 the true value is 20000000000000008.0. */
865 if (decpt <= -4 || decpt > 16)
866 use_exp = 1;
867 break;
868 case 's':
869 /* if we're forcing a digit after the point, convert to
870 exponential format at 1e11. If not, convert at 1e12. */
871 if (decpt <= -4 || decpt >
872 (add_dot_0_if_integer ? precision-1 : precision))
873 use_exp = 1;
874 break;
875 default:
876 PyErr_BadInternalCall();
877 goto exit;
878 }
879
880 /* if using an exponent, reset decimal point position to 1 and adjust
881 exponent accordingly.*/
882 if (use_exp) {
883 exp = decpt - 1;
884 decpt = 1;
885 }
886 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
887 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
888 vdigits_start = decpt <= 0 ? decpt-1 : 0;
889 if (!use_exp && add_dot_0_if_integer)
890 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
891 else
892 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
893
894 /* double check inequalities */
895 assert(vdigits_start <= 0 &&
896 0 <= digits_len &&
897 digits_len <= vdigits_end);
898 /* decimal point should be in (vdigits_start, vdigits_end] */
899 assert(vdigits_start < decpt && decpt <= vdigits_end);
900
901 /* Compute an upper bound how much memory we need. This might be a few
902 chars too long, but no big deal. */
903 bufsize =
904 /* sign, decimal point and trailing 0 byte */
905 3 +
906
907 /* total digit count (including zero padding on both sides) */
908 (vdigits_end - vdigits_start) +
909
910 /* exponent "e+100", max 3 numerical digits */
911 (use_exp ? 5 : 0);
912
913 /* Now allocate the memory and initialize p to point to the start of
914 it. */
915 buf = (char *)PyMem_Malloc(bufsize);
916 if (buf == NULL) {
917 PyErr_NoMemory();
918 goto exit;
919 }
920 p = buf;
921
922 /* Add a negative sign if negative, and a plus sign if non-negative
923 and always_add_sign is true. */
924 if (sign == 1)
925 *p++ = '-';
926 else if (always_add_sign)
927 *p++ = '+';
928
929 /* note that exactly one of the three 'if' conditions is true,
930 so we include exactly one decimal point */
931 /* Zero padding on left of digit string */
932 if (decpt <= 0) {
933 memset(p, '0', decpt-vdigits_start);
934 p += decpt - vdigits_start;
935 *p++ = '.';
936 memset(p, '0', 0-decpt);
937 p += 0-decpt;
938 }
939 else {
940 memset(p, '0', 0-vdigits_start);
941 p += 0 - vdigits_start;
942 }
943
944 /* Digits, with included decimal point */
945 if (0 < decpt && decpt <= digits_len) {
946 strncpy(p, digits, decpt-0);
947 p += decpt-0;
948 *p++ = '.';
949 strncpy(p, digits+decpt, digits_len-decpt);
950 p += digits_len-decpt;
951 }
952 else {
953 strncpy(p, digits, digits_len);
954 p += digits_len;
955 }
956
957 /* And zeros on the right */
958 if (digits_len < decpt) {
959 memset(p, '0', decpt-digits_len);
960 p += decpt-digits_len;
961 *p++ = '.';
962 memset(p, '0', vdigits_end-decpt);
963 p += vdigits_end-decpt;
964 }
965 else {
966 memset(p, '0', vdigits_end-digits_len);
967 p += vdigits_end-digits_len;
968 }
969
970 /* Delete a trailing decimal pt unless using alternative formatting. */
971 if (p[-1] == '.' && !use_alt_formatting)
972 p--;
973
974 /* Now that we've done zero padding, add an exponent if needed. */
975 if (use_exp) {
976 *p++ = float_strings[OFS_E][0];
977 exp_len = sprintf(p, "%+.02d", exp);
978 p += exp_len;
979 }
980 exit:
981 if (buf) {
982 *p = '\0';
983 /* It's too late if this fails, as we've already stepped on
984 memory that isn't ours. But it's an okay debugging test. */
985 assert(p-buf < bufsize);
986 }
987 if (digits)
988 _Py_dg_freedtoa(digits);
989
990 return buf;
991}
992
993
994PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Eric Smith193125a2009-04-16 22:08:31 +0000995 char format_code,
996 int precision,
997 int flags,
Eric Smith0923d1d2009-04-16 20:16:10 +0000998 int *type)
999{
Eric Smith193125a2009-04-16 22:08:31 +00001000 char **float_strings = lc_float_strings;
1001 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +00001002
Eric Smith193125a2009-04-16 22:08:31 +00001003 /* Validate format_code, and map upper and lower case. Compute the
1004 mode and make any adjustments as needed. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001005 switch (format_code) {
Eric Smith193125a2009-04-16 22:08:31 +00001006 /* exponent */
Eric Smith0923d1d2009-04-16 20:16:10 +00001007 case 'E':
Eric Smith0923d1d2009-04-16 20:16:10 +00001008 float_strings = uc_float_strings;
Eric Smith193125a2009-04-16 22:08:31 +00001009 format_code = 'e';
1010 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001011 case 'e':
1012 mode = 2;
1013 precision++;
1014 break;
Eric Smith193125a2009-04-16 22:08:31 +00001015
1016 /* fixed */
1017 case 'F':
1018 float_strings = uc_float_strings;
1019 format_code = 'f';
1020 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001021 case 'f':
1022 mode = 3;
1023 break;
Eric Smith193125a2009-04-16 22:08:31 +00001024
1025 /* general */
1026 case 'G':
1027 float_strings = uc_float_strings;
1028 format_code = 'g';
1029 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001030 case 'g':
1031 mode = 2;
1032 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1033 if (precision == 0)
1034 precision = 1;
1035 break;
Eric Smith193125a2009-04-16 22:08:31 +00001036
1037 /* repr format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001038 case 'r':
Eric Smith0923d1d2009-04-16 20:16:10 +00001039 mode = 0;
1040 /* Supplied precision is unused, must be 0. */
1041 if (precision != 0) {
1042 PyErr_BadInternalCall();
1043 return NULL;
1044 }
1045 break;
Eric Smith193125a2009-04-16 22:08:31 +00001046
1047 /* str format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001048 case 's':
1049 mode = 2;
1050 /* Supplied precision is unused, must be 0. */
1051 if (precision != 0) {
1052 PyErr_BadInternalCall();
1053 return NULL;
1054 }
1055 precision = 12;
1056 break;
Eric Smith193125a2009-04-16 22:08:31 +00001057
1058 default:
1059 PyErr_BadInternalCall();
1060 return NULL;
Eric Smith0923d1d2009-04-16 20:16:10 +00001061 }
1062
Eric Smith193125a2009-04-16 22:08:31 +00001063 return format_float_short(val, format_code, mode, precision,
Eric Smith0923d1d2009-04-16 20:16:10 +00001064 flags & Py_DTSF_SIGN,
1065 flags & Py_DTSF_ADD_DOT_0,
1066 flags & Py_DTSF_ALT,
1067 float_strings, type);
1068}
1069#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */