blob: b5dd93d17a8dcb9449ebec18c7844f013852275c [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
Martin v. Löwis737ea822004-06-08 18:52:54 +000010
11
12/**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
17 *
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
23 *
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
28 *
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000033 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000034 *
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
37 *
38 * Return value: the #gdouble value.
39 **/
Eric Smith0923d1d2009-04-16 20:16:10 +000040
41#ifndef PY_NO_SHORT_FLOAT_REPR
42
43double
44PyOS_ascii_strtod(const char *nptr, char **endptr)
45{
46 double result;
47 _Py_SET_53BIT_PRECISION_HEADER;
48
49 assert(nptr != NULL);
50 /* Set errno to zero, so that we can distinguish zero results
51 and underflows */
52 errno = 0;
53
54 _Py_SET_53BIT_PRECISION_START;
55 result = _Py_dg_strtod(nptr, endptr);
56 _Py_SET_53BIT_PRECISION_END;
57
58 return result;
59
60}
61
62#else
63
64/*
65 Use system strtod; since strtod is locale aware, we may
66 have to first fix the decimal separator.
67
68 Note that unlike _Py_dg_strtod, the system strtod may not always give
69 correctly rounded results.
70*/
71
Martin v. Löwis737ea822004-06-08 18:52:54 +000072double
Neal Norwitze7214a12005-12-18 05:03:17 +000073PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000074{
75 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000076 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000077 struct lconv *locale_data;
78 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000079 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000080 const char *p, *decimal_point_pos;
81 const char *end = NULL; /* Silence gcc */
Christian Heimesfaf2f632008-01-06 16:59:19 +000082 const char *digits_pos = NULL;
83 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000084
Martin v. Löwis737ea822004-06-08 18:52:54 +000085 assert(nptr != NULL);
86
87 fail_pos = NULL;
88
89 locale_data = localeconv();
90 decimal_point = locale_data->decimal_point;
91 decimal_point_len = strlen(decimal_point);
92
93 assert(decimal_point_len != 0);
94
95 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +000096
Mark Dickinson6d65df12009-04-26 15:30:47 +000097 /* Set errno to zero, so that we can distinguish zero results
98 and underflows */
99 errno = 0;
100
Christian Heimesfaf2f632008-01-06 16:59:19 +0000101 /* We process any leading whitespace and the optional sign manually,
102 then pass the remainder to the system strtod. This ensures that
103 the result of an underflow has the correct sign. (bug #1725) */
104
105 p = nptr;
106 /* Skip leading space */
107 while (ISSPACE(*p))
108 p++;
109
110 /* Process leading sign, if present */
111 if (*p == '-') {
112 negate = 1;
113 p++;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000114 }
115 else if (*p == '+') {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000116 p++;
117 }
118
Mark Dickinson6d65df12009-04-26 15:30:47 +0000119 /* Parse infinities and nans */
120 if (*p == 'i' || *p == 'I') {
121 if (PyOS_strnicmp(p, "inf", 3) == 0) {
122 val = Py_HUGE_VAL;
123 if (PyOS_strnicmp(p+3, "inity", 5) == 0)
124 fail_pos = (char *)p+8;
125 else
126 fail_pos = (char *)p+3;
127 goto got_val;
128 }
129 else
130 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000131 }
Mark Dickinson6d65df12009-04-26 15:30:47 +0000132#ifdef Py_NAN
133 if (*p == 'n' || *p == 'N') {
134 if (PyOS_strnicmp(p, "nan", 3) == 0) {
135 val = Py_NAN;
136 fail_pos = (char *)p+3;
137 goto got_val;
138 }
139 else
140 goto invalid_string;
141 }
142#endif
Christian Heimesfaf2f632008-01-06 16:59:19 +0000143
Mark Dickinson6d65df12009-04-26 15:30:47 +0000144 /* Some platform strtods accept hex floats; Python shouldn't (at the
145 moment), so we check explicitly for strings starting with '0x'. */
146 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
147 goto invalid_string;
148
149 /* Check that what's left begins with a digit or decimal point */
150 if (!ISDIGIT(*p) && *p != '.')
151 goto invalid_string;
152
153 digits_pos = p;
154 if (decimal_point[0] != '.' ||
Martin v. Löwis737ea822004-06-08 18:52:54 +0000155 decimal_point[1] != 0)
156 {
Mark Dickinson6d65df12009-04-26 15:30:47 +0000157 /* Look for a '.' in the input; if present, it'll need to be
158 swapped for the current locale's decimal point before we
159 call strtod. On the other hand, if we find the current
160 locale's decimal point then the input is invalid. */
Neal Norwitze7214a12005-12-18 05:03:17 +0000161 while (ISDIGIT(*p))
162 p++;
163
164 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000165 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000166 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000167
Mark Dickinson6d65df12009-04-26 15:30:47 +0000168 /* locate end of number */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000169 while (ISDIGIT(*p))
170 p++;
171
Neal Norwitze7214a12005-12-18 05:03:17 +0000172 if (*p == 'e' || *p == 'E')
173 p++;
174 if (*p == '+' || *p == '-')
175 p++;
176 while (ISDIGIT(*p))
177 p++;
178 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000180 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 /* Python bug #1417699 */
Mark Dickinson6d65df12009-04-26 15:30:47 +0000182 goto invalid_string;
Christian Heimesb186d002008-03-18 15:15:01 +0000183 /* For the other cases, we need not convert the decimal
184 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000185 }
186
Mark Dickinson6d65df12009-04-26 15:30:47 +0000187 if (decimal_point_pos) {
Martin v. Löwis737ea822004-06-08 18:52:54 +0000188 char *copy, *c;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000189 /* Create a copy of the input, with the '.' converted to the
190 locale-specific decimal point */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000191 copy = (char *)PyMem_MALLOC(end - digits_pos +
192 1 + decimal_point_len);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000193 if (copy == NULL) {
194 if (endptr)
195 *endptr = (char *)nptr;
196 errno = ENOMEM;
197 return val;
198 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000199
200 c = copy;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000201 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
202 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000203 memcpy(c, decimal_point, decimal_point_len);
204 c += decimal_point_len;
Christian Heimesb186d002008-03-18 15:15:01 +0000205 memcpy(c, decimal_point_pos + 1,
206 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000207 c += end - (decimal_point_pos + 1);
208 *c = 0;
209
210 val = strtod(copy, &fail_pos);
211
212 if (fail_pos)
213 {
214 if (fail_pos > decimal_point_pos)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000215 fail_pos = (char *)digits_pos +
216 (fail_pos - copy) -
217 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000218 else
Christian Heimesfaf2f632008-01-06 16:59:19 +0000219 fail_pos = (char *)digits_pos +
220 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000221 }
222
Thomas Wouters477c8d52006-05-27 19:21:47 +0000223 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000224
225 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000226 else {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000227 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000228 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000229
Christian Heimesfaf2f632008-01-06 16:59:19 +0000230 if (fail_pos == digits_pos)
Mark Dickinson6d65df12009-04-26 15:30:47 +0000231 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000232
Mark Dickinson6d65df12009-04-26 15:30:47 +0000233 got_val:
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 if (negate && fail_pos != nptr)
235 val = -val;
236
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237 if (endptr)
238 *endptr = fail_pos;
239
240 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000241
242 invalid_string:
243 if (endptr)
244 *endptr = (char*)nptr;
245 errno = EINVAL;
246 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000247}
248
Eric Smith0923d1d2009-04-16 20:16:10 +0000249#endif
250
251double
252PyOS_ascii_atof(const char *nptr)
253{
254 return PyOS_ascii_strtod(nptr, NULL);
255}
256
257
Eric Smithb2c7af82008-04-30 02:12:09 +0000258/* Given a string that may have a decimal point in the current
259 locale, change it back to a dot. Since the string cannot get
260 longer, no need for a maximum buffer size parameter. */
261Py_LOCAL_INLINE(void)
262change_decimal_from_locale_to_dot(char* buffer)
263{
264 struct lconv *locale_data = localeconv();
265 const char *decimal_point = locale_data->decimal_point;
266
267 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
268 size_t decimal_point_len = strlen(decimal_point);
269
270 if (*buffer == '+' || *buffer == '-')
271 buffer++;
272 while (isdigit(Py_CHARMASK(*buffer)))
273 buffer++;
274 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
275 *buffer = '.';
276 buffer++;
277 if (decimal_point_len > 1) {
278 /* buffer needs to get smaller */
279 size_t rest_len = strlen(buffer +
280 (decimal_point_len - 1));
281 memmove(buffer,
282 buffer + (decimal_point_len - 1),
283 rest_len);
284 buffer[rest_len] = 0;
285 }
286 }
287 }
288}
289
Martin v. Löwis737ea822004-06-08 18:52:54 +0000290
Christian Heimesc3f30c42008-02-22 16:37:40 +0000291/* From the C99 standard, section 7.19.6:
292The exponent always contains at least two digits, and only as many more digits
293as necessary to represent the exponent.
294*/
295#define MIN_EXPONENT_DIGITS 2
296
Eric Smithb2c7af82008-04-30 02:12:09 +0000297/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
298 in length. */
299Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000300ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000301{
302 char *p = strpbrk(buffer, "eE");
303 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
304 char *start = p + 2;
305 int exponent_digit_cnt = 0;
306 int leading_zero_cnt = 0;
307 int in_leading_zeros = 1;
308 int significant_digit_cnt;
309
310 /* Skip over the exponent and the sign. */
311 p += 2;
312
313 /* Find the end of the exponent, keeping track of leading
314 zeros. */
315 while (*p && isdigit(Py_CHARMASK(*p))) {
316 if (in_leading_zeros && *p == '0')
317 ++leading_zero_cnt;
318 if (*p != '0')
319 in_leading_zeros = 0;
320 ++p;
321 ++exponent_digit_cnt;
322 }
323
324 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
325 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
326 /* If there are 2 exactly digits, we're done,
327 regardless of what they contain */
328 }
329 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
330 int extra_zeros_cnt;
331
332 /* There are more than 2 digits in the exponent. See
333 if we can delete some of the leading zeros */
334 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
335 significant_digit_cnt = MIN_EXPONENT_DIGITS;
336 extra_zeros_cnt = exponent_digit_cnt -
337 significant_digit_cnt;
338
339 /* Delete extra_zeros_cnt worth of characters from the
340 front of the exponent */
341 assert(extra_zeros_cnt >= 0);
342
343 /* Add one to significant_digit_cnt to copy the
344 trailing 0 byte, thus setting the length */
345 memmove(start,
346 start + extra_zeros_cnt,
347 significant_digit_cnt + 1);
348 }
349 else {
350 /* If there are fewer than 2 digits, add zeros
351 until there are 2, if there's enough room */
352 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
353 if (start + zeros + exponent_digit_cnt + 1
354 < buffer + buf_size) {
355 memmove(start + zeros, start,
356 exponent_digit_cnt + 1);
357 memset(start, '0', zeros);
358 }
359 }
360 }
361}
362
Eric Smith0923d1d2009-04-16 20:16:10 +0000363/* Ensure that buffer has a decimal point in it. The decimal point will not
364 be in the current locale, it will always be '.'. Don't add a decimal if an
365 exponent is present. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000366Py_LOCAL_INLINE(void)
367ensure_decimal_point(char* buffer, size_t buf_size)
368{
369 int insert_count = 0;
370 char* chars_to_insert;
371
372 /* search for the first non-digit character */
373 char *p = buffer;
Eric Smith2ad79e82008-07-19 00:33:23 +0000374 if (*p == '-' || *p == '+')
375 /* Skip leading sign, if present. I think this could only
376 ever be '-', but it can't hurt to check for both. */
377 ++p;
Eric Smithb2c7af82008-04-30 02:12:09 +0000378 while (*p && isdigit(Py_CHARMASK(*p)))
379 ++p;
380
381 if (*p == '.') {
382 if (isdigit(Py_CHARMASK(*(p+1)))) {
383 /* Nothing to do, we already have a decimal
384 point and a digit after it */
385 }
386 else {
387 /* We have a decimal point, but no following
388 digit. Insert a zero after the decimal. */
389 ++p;
390 chars_to_insert = "0";
391 insert_count = 1;
392 }
393 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000394 else if (!(*p == 'e' || *p == 'E')) {
395 /* Don't add ".0" if we have an exponent. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000396 chars_to_insert = ".0";
397 insert_count = 2;
398 }
399 if (insert_count) {
400 size_t buf_len = strlen(buffer);
401 if (buf_len + insert_count + 1 >= buf_size) {
402 /* If there is not enough room in the buffer
403 for the additional text, just skip it. It's
404 not worth generating an error over. */
405 }
406 else {
407 memmove(p + insert_count, p,
408 buffer + strlen(buffer) - p + 1);
409 memcpy(p, chars_to_insert, insert_count);
410 }
411 }
412}
413
Christian Heimesc3f30c42008-02-22 16:37:40 +0000414/* see FORMATBUFLEN in unicodeobject.c */
415#define FLOAT_FORMATBUFLEN 120
416
Martin v. Löwis737ea822004-06-08 18:52:54 +0000417/**
418 * PyOS_ascii_formatd:
419 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000420 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000421 * @format: The printf()-style format to use for the
422 * code to use for converting.
423 * @d: The #gdouble to convert
424 *
425 * Converts a #gdouble to a string, using the '.' as
426 * decimal point. To format the number you pass in
427 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000428 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000429 *
Christian Heimesb186d002008-03-18 15:15:01 +0000430 * 'Z' is the same as 'g', except it always has a decimal and
431 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000432 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000433 * Return value: The pointer to the buffer with the converted string.
434 **/
435char *
436PyOS_ascii_formatd(char *buffer,
Christian Heimesb186d002008-03-18 15:15:01 +0000437 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000438 const char *format,
439 double d)
440{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000441 char format_char;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000442 size_t format_len = strlen(format);
443
Christian Heimesb186d002008-03-18 15:15:01 +0000444 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
445 also with at least one character past the decimal. */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000446 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000447
Christian Heimesc3f30c42008-02-22 16:37:40 +0000448 /* The last character in the format string must be the format char */
449 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000450
Martin v. Löwis737ea822004-06-08 18:52:54 +0000451 if (format[0] != '%')
452 return NULL;
453
Christian Heimesc3f30c42008-02-22 16:37:40 +0000454 /* I'm not sure why this test is here. It's ensuring that the format
455 string after the first character doesn't have a single quote, a
456 lowercase l, or a percent. This is the reverse of the commented-out
457 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000458 if (strpbrk(format + 1, "'l%"))
459 return NULL;
460
Christian Heimesb186d002008-03-18 15:15:01 +0000461 /* Also curious about this function is that it accepts format strings
462 like "%xg", which are invalid for floats. In general, the
463 interface to this function is not very good, but changing it is
464 difficult because it's a public API. */
465
Martin v. Löwis737ea822004-06-08 18:52:54 +0000466 if (!(format_char == 'e' || format_char == 'E' ||
467 format_char == 'f' || format_char == 'F' ||
Christian Heimesc3f30c42008-02-22 16:37:40 +0000468 format_char == 'g' || format_char == 'G' ||
Eric Smith0923d1d2009-04-16 20:16:10 +0000469 format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000470 return NULL;
471
Eric Smith0923d1d2009-04-16 20:16:10 +0000472 /* Map 'Z' format_char to 'g', by copying the format string and
Christian Heimesb186d002008-03-18 15:15:01 +0000473 replacing the final char with a 'g' */
Eric Smith0923d1d2009-04-16 20:16:10 +0000474 if (format_char == 'Z') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000475 if (format_len + 1 >= sizeof(tmp_format)) {
476 /* The format won't fit in our copy. Error out. In
Christian Heimesb186d002008-03-18 15:15:01 +0000477 practice, this will never happen and will be
478 detected by returning NULL */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000479 return NULL;
480 }
481 strcpy(tmp_format, format);
482 tmp_format[format_len - 1] = 'g';
483 format = tmp_format;
484 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000485
Christian Heimesb186d002008-03-18 15:15:01 +0000486
Christian Heimesc3f30c42008-02-22 16:37:40 +0000487 /* Have PyOS_snprintf do the hard work */
Christian Heimesb186d002008-03-18 15:15:01 +0000488 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000489
Eric Smithb2c7af82008-04-30 02:12:09 +0000490 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000491
Eric Smithb2c7af82008-04-30 02:12:09 +0000492 /* Get the current locale, and find the decimal point string.
Eric Smith0923d1d2009-04-16 20:16:10 +0000493 Convert that string back to a dot. */
494 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000495
496 /* If an exponent exists, ensure that the exponent is at least
497 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
498 for the extra zeros. Also, if there are more than
499 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
500 back to MIN_EXPONENT_DIGITS */
Mark Dickinsonce95e562009-04-26 20:02:24 +0000501 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000502
Christian Heimesb186d002008-03-18 15:15:01 +0000503 /* If format_char is 'Z', make sure we have at least one character
504 after the decimal point (and make sure we have a decimal point). */
Eric Smithb2c7af82008-04-30 02:12:09 +0000505 if (format_char == 'Z')
506 ensure_decimal_point(buffer, buf_size);
Christian Heimesb186d002008-03-18 15:15:01 +0000507
Martin v. Löwis737ea822004-06-08 18:52:54 +0000508 return buffer;
509}
510
Eric Smith0923d1d2009-04-16 20:16:10 +0000511#ifdef PY_NO_SHORT_FLOAT_REPR
512
513/* The fallback code to use if _Py_dg_dtoa is not available. */
514
Mark Dickinson3370cce2009-04-17 22:40:53 +0000515/* Remove trailing zeros after the decimal point from a numeric string; also
516 remove the decimal point if all digits following it are zero. The numeric
517 string must end in '\0', and should not have any leading or trailing
518 whitespace. Assumes that the decimal point is '.'. */
519Py_LOCAL_INLINE(void)
520remove_trailing_zeros(char *buffer)
521{
522 char *old_fraction_end, *new_fraction_end, *end, *p;
523
524 p = buffer;
525 if (*p == '-' || *p == '+')
526 /* Skip leading sign, if present */
527 ++p;
528 while (isdigit(Py_CHARMASK(*p)))
529 ++p;
530
531 /* if there's no decimal point there's nothing to do */
532 if (*p++ != '.')
533 return;
534
535 /* scan any digits after the point */
536 while (isdigit(Py_CHARMASK(*p)))
537 ++p;
538 old_fraction_end = p;
539
540 /* scan up to ending '\0' */
541 while (*p != '\0')
542 p++;
543 /* +1 to make sure that we move the null byte as well */
544 end = p+1;
545
546 /* scan back from fraction_end, looking for removable zeros */
547 p = old_fraction_end;
548 while (*(p-1) == '0')
549 --p;
550 /* and remove point if we've got that far */
551 if (*(p-1) == '.')
552 --p;
553 new_fraction_end = p;
554
555 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
556}
557
558
Eric Smith0923d1d2009-04-16 20:16:10 +0000559PyAPI_FUNC(char *) PyOS_double_to_string(double val,
560 char format_code,
561 int precision,
562 int flags,
563 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000564{
Eric Smith0923d1d2009-04-16 20:16:10 +0000565 char buf[128];
566 char format[32];
567 Py_ssize_t len;
568 char *result;
569 char *p;
570 int t;
571 int upper = 0;
Mark Dickinson3370cce2009-04-17 22:40:53 +0000572 int strip_trailing_zeros = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000573
574 /* Validate format_code, and map upper and lower case */
575 switch (format_code) {
576 case 'e': /* exponent */
577 case 'f': /* fixed */
578 case 'g': /* general */
579 break;
580 case 'E':
581 upper = 1;
582 format_code = 'e';
583 break;
584 case 'F':
585 upper = 1;
586 format_code = 'f';
587 break;
588 case 'G':
589 upper = 1;
590 format_code = 'g';
591 break;
592 case 'r': /* repr format */
593 /* Supplied precision is unused, must be 0. */
594 if (precision != 0) {
595 PyErr_BadInternalCall();
596 return NULL;
597 }
598 precision = 17;
599 format_code = 'g';
600 break;
601 case 's': /* str format */
602 /* Supplied precision is unused, must be 0. */
603 if (precision != 0) {
604 PyErr_BadInternalCall();
605 return NULL;
606 }
Mark Dickinson3370cce2009-04-17 22:40:53 +0000607 /* switch to exponential notation at 1e11, or 1e12 if we're
608 not adding a .0 */
609 if (fabs(val) >= (flags & Py_DTSF_ADD_DOT_0 ? 1e11 : 1e12)) {
610 precision = 11;
611 format_code = 'e';
612 strip_trailing_zeros = 1;
613 }
614 else {
615 precision = 12;
616 format_code = 'g';
617 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000618 break;
619 default:
620 PyErr_BadInternalCall();
621 return NULL;
622 }
623
624 /* Handle nan and inf. */
625 if (Py_IS_NAN(val)) {
626 strcpy(buf, "nan");
627 t = Py_DTST_NAN;
628 } else if (Py_IS_INFINITY(val)) {
629 if (copysign(1., val) == 1.)
630 strcpy(buf, "inf");
631 else
632 strcpy(buf, "-inf");
633 t = Py_DTST_INFINITE;
634 } else {
635 t = Py_DTST_FINITE;
636
637
Mark Dickinson3370cce2009-04-17 22:40:53 +0000638 if ((flags & Py_DTSF_ADD_DOT_0) && (format_code != 'e'))
Eric Smith0923d1d2009-04-16 20:16:10 +0000639 format_code = 'Z';
640
641 PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
642 PyOS_ascii_formatd(buf, sizeof(buf), format, val);
Mark Dickinson3370cce2009-04-17 22:40:53 +0000643 /* remove trailing zeros if necessary */
644 if (strip_trailing_zeros)
645 remove_trailing_zeros(buf);
Eric Smith0923d1d2009-04-16 20:16:10 +0000646 }
647
648 len = strlen(buf);
649
650 /* Add 1 for the trailing 0 byte.
651 Add 1 because we might need to make room for the sign.
652 */
653 result = PyMem_Malloc(len + 2);
654 if (result == NULL) {
655 PyErr_NoMemory();
656 return NULL;
657 }
658 p = result;
659
Mark Dickinsonad476da2009-04-23 19:14:16 +0000660 /* Add sign when requested. It's convenient (esp. when formatting
661 complex numbers) to include a sign even for inf and nan. */
662 if (flags & Py_DTSF_SIGN && buf[0] != '-')
Eric Smith0923d1d2009-04-16 20:16:10 +0000663 *p++ = '+';
664
665 strcpy(p, buf);
666
667 if (upper) {
668 /* Convert to upper case. */
669 char *p1;
670 for (p1 = p; *p1; p1++)
671 *p1 = toupper(*p1);
672 }
673
674 if (type)
675 *type = t;
676 return result;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000677}
Eric Smith0923d1d2009-04-16 20:16:10 +0000678
679#else
680
681/* _Py_dg_dtoa is available. */
682
683/* I'm using a lookup table here so that I don't have to invent a non-locale
684 specific way to convert to uppercase */
685#define OFS_INF 0
686#define OFS_NAN 1
687#define OFS_E 2
688
689/* The lengths of these are known to the code below, so don't change them */
690static char *lc_float_strings[] = {
691 "inf",
692 "nan",
693 "e",
694};
695static char *uc_float_strings[] = {
696 "INF",
697 "NAN",
698 "E",
699};
700
701
702/* Convert a double d to a string, and return a PyMem_Malloc'd block of
703 memory contain the resulting string.
704
705 Arguments:
706 d is the double to be converted
707 format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g'
708 correspond to '%e', '%f' and '%g'; 'r' and 's' correspond
709 to repr and str.
710 mode is one of '0', '2' or '3', and is completely determined by
711 format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0.
712 precision is the desired precision
713 always_add_sign is nonzero if a '+' sign should be included for positive
714 numbers
715 add_dot_0_if_integer is nonzero if integers in non-exponential form
716 should have ".0" added. Only applies to format codes 'r', 's', and 'g'.
717 use_alt_formatting is nonzero if alternative formatting should be
718 used. Only applies to format codes 'e', 'f' and 'g'.
719 type, if non-NULL, will be set to one of these constants to identify
720 the type of the 'd' argument:
721 Py_DTST_FINITE
722 Py_DTST_INFINITE
723 Py_DTST_NAN
724
725 Returns a PyMem_Malloc'd block of memory containing the resulting string,
726 or NULL on error. If NULL is returned, the Python error has been set.
727 */
728
729static char *
730format_float_short(double d, char format_code,
731 int mode, Py_ssize_t precision,
732 int always_add_sign, int add_dot_0_if_integer,
733 int use_alt_formatting, char **float_strings, int *type)
734{
735 char *buf = NULL;
736 char *p = NULL;
737 Py_ssize_t bufsize = 0;
738 char *digits, *digits_end;
739 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
740 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
741 _Py_SET_53BIT_PRECISION_HEADER;
742
743 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
744 Must be matched by a call to _Py_dg_freedtoa. */
745 _Py_SET_53BIT_PRECISION_START;
746 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
747 &digits_end);
748 _Py_SET_53BIT_PRECISION_END;
749
750 decpt = (Py_ssize_t)decpt_as_int;
751 if (digits == NULL) {
752 /* The only failure mode is no memory. */
753 PyErr_NoMemory();
754 goto exit;
755 }
756 assert(digits_end != NULL && digits_end >= digits);
757 digits_len = digits_end - digits;
758
Mark Dickinson3370cce2009-04-17 22:40:53 +0000759 if (digits_len && !isdigit(Py_CHARMASK(digits[0]))) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000760 /* Infinities and nans here; adapt Gay's output,
761 so convert Infinity to inf and NaN to nan, and
762 ignore sign of nan. Then return. */
763
Mark Dickinsonad476da2009-04-23 19:14:16 +0000764 /* ignore the actual sign of a nan */
765 if (digits[0] == 'n' || digits[0] == 'N')
766 sign = 0;
767
Eric Smith0923d1d2009-04-16 20:16:10 +0000768 /* We only need 5 bytes to hold the result "+inf\0" . */
769 bufsize = 5; /* Used later in an assert. */
770 buf = (char *)PyMem_Malloc(bufsize);
771 if (buf == NULL) {
772 PyErr_NoMemory();
773 goto exit;
774 }
775 p = buf;
776
Mark Dickinsonad476da2009-04-23 19:14:16 +0000777 if (sign == 1) {
778 *p++ = '-';
779 }
780 else if (always_add_sign) {
781 *p++ = '+';
782 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000783 if (digits[0] == 'i' || digits[0] == 'I') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000784 strncpy(p, float_strings[OFS_INF], 3);
785 p += 3;
786
787 if (type)
788 *type = Py_DTST_INFINITE;
789 }
790 else if (digits[0] == 'n' || digits[0] == 'N') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000791 strncpy(p, float_strings[OFS_NAN], 3);
792 p += 3;
793
794 if (type)
795 *type = Py_DTST_NAN;
796 }
797 else {
798 /* shouldn't get here: Gay's code should always return
799 something starting with a digit, an 'I', or 'N' */
800 strncpy(p, "ERR", 3);
801 p += 3;
802 assert(0);
803 }
804 goto exit;
805 }
806
807 /* The result must be finite (not inf or nan). */
808 if (type)
809 *type = Py_DTST_FINITE;
810
811
812 /* We got digits back, format them. We may need to pad 'digits'
813 either on the left or right (or both) with extra zeros, so in
814 general the resulting string has the form
815
816 [<sign>]<zeros><digits><zeros>[<exponent>]
817
818 where either of the <zeros> pieces could be empty, and there's a
819 decimal point that could appear either in <digits> or in the
820 leading or trailing <zeros>.
821
822 Imagine an infinite 'virtual' string vdigits, consisting of the
823 string 'digits' (starting at index 0) padded on both the left and
824 right with infinite strings of zeros. We want to output a slice
825
826 vdigits[vdigits_start : vdigits_end]
827
828 of this virtual string. Thus if vdigits_start < 0 then we'll end
829 up producing some leading zeros; if vdigits_end > digits_len there
830 will be trailing zeros in the output. The next section of code
831 determines whether to use an exponent or not, figures out the
832 position 'decpt' of the decimal point, and computes 'vdigits_start'
833 and 'vdigits_end'. */
834 vdigits_end = digits_len;
835 switch (format_code) {
836 case 'e':
837 use_exp = 1;
838 vdigits_end = precision;
839 break;
840 case 'f':
841 vdigits_end = decpt + precision;
842 break;
843 case 'g':
844 if (decpt <= -4 || decpt > precision)
845 use_exp = 1;
846 if (use_alt_formatting)
847 vdigits_end = precision;
848 break;
849 case 'r':
850 /* convert to exponential format at 1e16. We used to convert
851 at 1e17, but that gives odd-looking results for some values
852 when a 16-digit 'shortest' repr is padded with bogus zeros.
853 For example, repr(2e16+8) would give 20000000000000010.0;
854 the true value is 20000000000000008.0. */
855 if (decpt <= -4 || decpt > 16)
856 use_exp = 1;
857 break;
858 case 's':
859 /* if we're forcing a digit after the point, convert to
860 exponential format at 1e11. If not, convert at 1e12. */
861 if (decpt <= -4 || decpt >
862 (add_dot_0_if_integer ? precision-1 : precision))
863 use_exp = 1;
864 break;
865 default:
866 PyErr_BadInternalCall();
867 goto exit;
868 }
869
870 /* if using an exponent, reset decimal point position to 1 and adjust
871 exponent accordingly.*/
872 if (use_exp) {
873 exp = decpt - 1;
874 decpt = 1;
875 }
876 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
877 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
878 vdigits_start = decpt <= 0 ? decpt-1 : 0;
879 if (!use_exp && add_dot_0_if_integer)
880 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
881 else
882 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
883
884 /* double check inequalities */
885 assert(vdigits_start <= 0 &&
886 0 <= digits_len &&
887 digits_len <= vdigits_end);
888 /* decimal point should be in (vdigits_start, vdigits_end] */
889 assert(vdigits_start < decpt && decpt <= vdigits_end);
890
891 /* Compute an upper bound how much memory we need. This might be a few
892 chars too long, but no big deal. */
893 bufsize =
894 /* sign, decimal point and trailing 0 byte */
895 3 +
896
897 /* total digit count (including zero padding on both sides) */
898 (vdigits_end - vdigits_start) +
899
900 /* exponent "e+100", max 3 numerical digits */
901 (use_exp ? 5 : 0);
902
903 /* Now allocate the memory and initialize p to point to the start of
904 it. */
905 buf = (char *)PyMem_Malloc(bufsize);
906 if (buf == NULL) {
907 PyErr_NoMemory();
908 goto exit;
909 }
910 p = buf;
911
912 /* Add a negative sign if negative, and a plus sign if non-negative
913 and always_add_sign is true. */
914 if (sign == 1)
915 *p++ = '-';
916 else if (always_add_sign)
917 *p++ = '+';
918
919 /* note that exactly one of the three 'if' conditions is true,
920 so we include exactly one decimal point */
921 /* Zero padding on left of digit string */
922 if (decpt <= 0) {
923 memset(p, '0', decpt-vdigits_start);
924 p += decpt - vdigits_start;
925 *p++ = '.';
926 memset(p, '0', 0-decpt);
927 p += 0-decpt;
928 }
929 else {
930 memset(p, '0', 0-vdigits_start);
931 p += 0 - vdigits_start;
932 }
933
934 /* Digits, with included decimal point */
935 if (0 < decpt && decpt <= digits_len) {
936 strncpy(p, digits, decpt-0);
937 p += decpt-0;
938 *p++ = '.';
939 strncpy(p, digits+decpt, digits_len-decpt);
940 p += digits_len-decpt;
941 }
942 else {
943 strncpy(p, digits, digits_len);
944 p += digits_len;
945 }
946
947 /* And zeros on the right */
948 if (digits_len < decpt) {
949 memset(p, '0', decpt-digits_len);
950 p += decpt-digits_len;
951 *p++ = '.';
952 memset(p, '0', vdigits_end-decpt);
953 p += vdigits_end-decpt;
954 }
955 else {
956 memset(p, '0', vdigits_end-digits_len);
957 p += vdigits_end-digits_len;
958 }
959
960 /* Delete a trailing decimal pt unless using alternative formatting. */
961 if (p[-1] == '.' && !use_alt_formatting)
962 p--;
963
964 /* Now that we've done zero padding, add an exponent if needed. */
965 if (use_exp) {
966 *p++ = float_strings[OFS_E][0];
967 exp_len = sprintf(p, "%+.02d", exp);
968 p += exp_len;
969 }
970 exit:
971 if (buf) {
972 *p = '\0';
973 /* It's too late if this fails, as we've already stepped on
974 memory that isn't ours. But it's an okay debugging test. */
975 assert(p-buf < bufsize);
976 }
977 if (digits)
978 _Py_dg_freedtoa(digits);
979
980 return buf;
981}
982
983
984PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Eric Smith193125a2009-04-16 22:08:31 +0000985 char format_code,
986 int precision,
987 int flags,
Eric Smith0923d1d2009-04-16 20:16:10 +0000988 int *type)
989{
Eric Smith193125a2009-04-16 22:08:31 +0000990 char **float_strings = lc_float_strings;
991 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +0000992
Eric Smith193125a2009-04-16 22:08:31 +0000993 /* Validate format_code, and map upper and lower case. Compute the
994 mode and make any adjustments as needed. */
Eric Smith0923d1d2009-04-16 20:16:10 +0000995 switch (format_code) {
Eric Smith193125a2009-04-16 22:08:31 +0000996 /* exponent */
Eric Smith0923d1d2009-04-16 20:16:10 +0000997 case 'E':
Eric Smith0923d1d2009-04-16 20:16:10 +0000998 float_strings = uc_float_strings;
Eric Smith193125a2009-04-16 22:08:31 +0000999 format_code = 'e';
1000 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001001 case 'e':
1002 mode = 2;
1003 precision++;
1004 break;
Eric Smith193125a2009-04-16 22:08:31 +00001005
1006 /* fixed */
1007 case 'F':
1008 float_strings = uc_float_strings;
1009 format_code = 'f';
1010 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001011 case 'f':
1012 mode = 3;
1013 break;
Eric Smith193125a2009-04-16 22:08:31 +00001014
1015 /* general */
1016 case 'G':
1017 float_strings = uc_float_strings;
1018 format_code = 'g';
1019 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001020 case 'g':
1021 mode = 2;
1022 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1023 if (precision == 0)
1024 precision = 1;
1025 break;
Eric Smith193125a2009-04-16 22:08:31 +00001026
1027 /* repr format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001028 case 'r':
Eric Smith0923d1d2009-04-16 20:16:10 +00001029 mode = 0;
1030 /* Supplied precision is unused, must be 0. */
1031 if (precision != 0) {
1032 PyErr_BadInternalCall();
1033 return NULL;
1034 }
1035 break;
Eric Smith193125a2009-04-16 22:08:31 +00001036
1037 /* str format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001038 case 's':
1039 mode = 2;
1040 /* Supplied precision is unused, must be 0. */
1041 if (precision != 0) {
1042 PyErr_BadInternalCall();
1043 return NULL;
1044 }
1045 precision = 12;
1046 break;
Eric Smith193125a2009-04-16 22:08:31 +00001047
1048 default:
1049 PyErr_BadInternalCall();
1050 return NULL;
Eric Smith0923d1d2009-04-16 20:16:10 +00001051 }
1052
Eric Smith193125a2009-04-16 22:08:31 +00001053 return format_float_short(val, format_code, mode, precision,
Eric Smith0923d1d2009-04-16 20:16:10 +00001054 flags & Py_DTSF_SIGN,
1055 flags & Py_DTSF_ADD_DOT_0,
1056 flags & Py_DTSF_ALT,
1057 float_strings, type);
1058}
1059#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */