blob: 872cc37ed7c502bb83cfffc0695ba58fb930dd1e [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
Martin v. Löwis737ea822004-06-08 18:52:54 +000010
11
12/**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
17 *
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
23 *
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
28 *
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000033 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000034 *
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
37 *
38 * Return value: the #gdouble value.
39 **/
Eric Smith0923d1d2009-04-16 20:16:10 +000040
41#ifndef PY_NO_SHORT_FLOAT_REPR
42
43double
44PyOS_ascii_strtod(const char *nptr, char **endptr)
45{
46 double result;
47 _Py_SET_53BIT_PRECISION_HEADER;
48
49 assert(nptr != NULL);
50 /* Set errno to zero, so that we can distinguish zero results
51 and underflows */
52 errno = 0;
53
54 _Py_SET_53BIT_PRECISION_START;
55 result = _Py_dg_strtod(nptr, endptr);
56 _Py_SET_53BIT_PRECISION_END;
57
58 return result;
59
60}
61
62#else
63
64/*
65 Use system strtod; since strtod is locale aware, we may
66 have to first fix the decimal separator.
67
68 Note that unlike _Py_dg_strtod, the system strtod may not always give
69 correctly rounded results.
70*/
71
Martin v. Löwis737ea822004-06-08 18:52:54 +000072double
Neal Norwitze7214a12005-12-18 05:03:17 +000073PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000074{
75 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000076 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000077 struct lconv *locale_data;
78 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000079 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000080 const char *p, *decimal_point_pos;
81 const char *end = NULL; /* Silence gcc */
Christian Heimesfaf2f632008-01-06 16:59:19 +000082 const char *digits_pos = NULL;
83 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000084
Martin v. Löwis737ea822004-06-08 18:52:54 +000085 assert(nptr != NULL);
86
87 fail_pos = NULL;
88
89 locale_data = localeconv();
90 decimal_point = locale_data->decimal_point;
91 decimal_point_len = strlen(decimal_point);
92
93 assert(decimal_point_len != 0);
94
95 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +000096
Mark Dickinson6d65df12009-04-26 15:30:47 +000097 /* Set errno to zero, so that we can distinguish zero results
98 and underflows */
99 errno = 0;
100
Christian Heimesfaf2f632008-01-06 16:59:19 +0000101 /* We process any leading whitespace and the optional sign manually,
102 then pass the remainder to the system strtod. This ensures that
103 the result of an underflow has the correct sign. (bug #1725) */
104
105 p = nptr;
106 /* Skip leading space */
107 while (ISSPACE(*p))
108 p++;
109
110 /* Process leading sign, if present */
111 if (*p == '-') {
112 negate = 1;
113 p++;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000114 }
115 else if (*p == '+') {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000116 p++;
117 }
118
Mark Dickinson6d65df12009-04-26 15:30:47 +0000119 /* Parse infinities and nans */
120 if (*p == 'i' || *p == 'I') {
121 if (PyOS_strnicmp(p, "inf", 3) == 0) {
122 val = Py_HUGE_VAL;
123 if (PyOS_strnicmp(p+3, "inity", 5) == 0)
124 fail_pos = (char *)p+8;
125 else
126 fail_pos = (char *)p+3;
127 goto got_val;
128 }
129 else
130 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000131 }
Mark Dickinson6d65df12009-04-26 15:30:47 +0000132#ifdef Py_NAN
133 if (*p == 'n' || *p == 'N') {
134 if (PyOS_strnicmp(p, "nan", 3) == 0) {
135 val = Py_NAN;
136 fail_pos = (char *)p+3;
137 goto got_val;
138 }
139 else
140 goto invalid_string;
141 }
142#endif
Christian Heimesfaf2f632008-01-06 16:59:19 +0000143
Mark Dickinson6d65df12009-04-26 15:30:47 +0000144 /* Some platform strtods accept hex floats; Python shouldn't (at the
145 moment), so we check explicitly for strings starting with '0x'. */
146 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
147 goto invalid_string;
148
149 /* Check that what's left begins with a digit or decimal point */
150 if (!ISDIGIT(*p) && *p != '.')
151 goto invalid_string;
152
153 digits_pos = p;
154 if (decimal_point[0] != '.' ||
Martin v. Löwis737ea822004-06-08 18:52:54 +0000155 decimal_point[1] != 0)
156 {
Mark Dickinson6d65df12009-04-26 15:30:47 +0000157 /* Look for a '.' in the input; if present, it'll need to be
158 swapped for the current locale's decimal point before we
159 call strtod. On the other hand, if we find the current
160 locale's decimal point then the input is invalid. */
Neal Norwitze7214a12005-12-18 05:03:17 +0000161 while (ISDIGIT(*p))
162 p++;
163
164 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000165 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000166 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000167
Mark Dickinson6d65df12009-04-26 15:30:47 +0000168 /* locate end of number */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000169 while (ISDIGIT(*p))
170 p++;
171
Neal Norwitze7214a12005-12-18 05:03:17 +0000172 if (*p == 'e' || *p == 'E')
173 p++;
174 if (*p == '+' || *p == '-')
175 p++;
176 while (ISDIGIT(*p))
177 p++;
178 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000180 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 /* Python bug #1417699 */
Mark Dickinson6d65df12009-04-26 15:30:47 +0000182 goto invalid_string;
Christian Heimesb186d002008-03-18 15:15:01 +0000183 /* For the other cases, we need not convert the decimal
184 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000185 }
186
Mark Dickinson6d65df12009-04-26 15:30:47 +0000187 if (decimal_point_pos) {
Martin v. Löwis737ea822004-06-08 18:52:54 +0000188 char *copy, *c;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000189 /* Create a copy of the input, with the '.' converted to the
190 locale-specific decimal point */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000191 copy = (char *)PyMem_MALLOC(end - digits_pos +
192 1 + decimal_point_len);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000193 if (copy == NULL) {
194 if (endptr)
195 *endptr = (char *)nptr;
196 errno = ENOMEM;
197 return val;
198 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000199
200 c = copy;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000201 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
202 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000203 memcpy(c, decimal_point, decimal_point_len);
204 c += decimal_point_len;
Christian Heimesb186d002008-03-18 15:15:01 +0000205 memcpy(c, decimal_point_pos + 1,
206 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000207 c += end - (decimal_point_pos + 1);
208 *c = 0;
209
210 val = strtod(copy, &fail_pos);
211
212 if (fail_pos)
213 {
214 if (fail_pos > decimal_point_pos)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000215 fail_pos = (char *)digits_pos +
216 (fail_pos - copy) -
217 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000218 else
Christian Heimesfaf2f632008-01-06 16:59:19 +0000219 fail_pos = (char *)digits_pos +
220 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000221 }
222
Thomas Wouters477c8d52006-05-27 19:21:47 +0000223 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000224
225 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000226 else {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000227 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000228 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000229
Christian Heimesfaf2f632008-01-06 16:59:19 +0000230 if (fail_pos == digits_pos)
Mark Dickinson6d65df12009-04-26 15:30:47 +0000231 goto invalid_string;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000232
Mark Dickinson6d65df12009-04-26 15:30:47 +0000233 got_val:
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 if (negate && fail_pos != nptr)
235 val = -val;
236
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237 if (endptr)
238 *endptr = fail_pos;
239
240 return val;
Mark Dickinson6d65df12009-04-26 15:30:47 +0000241
242 invalid_string:
243 if (endptr)
244 *endptr = (char*)nptr;
245 errno = EINVAL;
246 return -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000247}
248
Eric Smith0923d1d2009-04-16 20:16:10 +0000249#endif
250
251double
252PyOS_ascii_atof(const char *nptr)
253{
254 return PyOS_ascii_strtod(nptr, NULL);
255}
256
257
Eric Smithb2c7af82008-04-30 02:12:09 +0000258/* Given a string that may have a decimal point in the current
259 locale, change it back to a dot. Since the string cannot get
260 longer, no need for a maximum buffer size parameter. */
261Py_LOCAL_INLINE(void)
262change_decimal_from_locale_to_dot(char* buffer)
263{
264 struct lconv *locale_data = localeconv();
265 const char *decimal_point = locale_data->decimal_point;
266
267 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
268 size_t decimal_point_len = strlen(decimal_point);
269
270 if (*buffer == '+' || *buffer == '-')
271 buffer++;
272 while (isdigit(Py_CHARMASK(*buffer)))
273 buffer++;
274 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
275 *buffer = '.';
276 buffer++;
277 if (decimal_point_len > 1) {
278 /* buffer needs to get smaller */
279 size_t rest_len = strlen(buffer +
280 (decimal_point_len - 1));
281 memmove(buffer,
282 buffer + (decimal_point_len - 1),
283 rest_len);
284 buffer[rest_len] = 0;
285 }
286 }
287 }
288}
289
Martin v. Löwis737ea822004-06-08 18:52:54 +0000290
Christian Heimesc3f30c42008-02-22 16:37:40 +0000291/* From the C99 standard, section 7.19.6:
292The exponent always contains at least two digits, and only as many more digits
293as necessary to represent the exponent.
294*/
295#define MIN_EXPONENT_DIGITS 2
296
Eric Smithb2c7af82008-04-30 02:12:09 +0000297/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
298 in length. */
299Py_LOCAL_INLINE(void)
Mark Dickinsonce95e562009-04-26 20:02:24 +0000300ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smithb2c7af82008-04-30 02:12:09 +0000301{
302 char *p = strpbrk(buffer, "eE");
303 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
304 char *start = p + 2;
305 int exponent_digit_cnt = 0;
306 int leading_zero_cnt = 0;
307 int in_leading_zeros = 1;
308 int significant_digit_cnt;
309
310 /* Skip over the exponent and the sign. */
311 p += 2;
312
313 /* Find the end of the exponent, keeping track of leading
314 zeros. */
315 while (*p && isdigit(Py_CHARMASK(*p))) {
316 if (in_leading_zeros && *p == '0')
317 ++leading_zero_cnt;
318 if (*p != '0')
319 in_leading_zeros = 0;
320 ++p;
321 ++exponent_digit_cnt;
322 }
323
324 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
325 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
326 /* If there are 2 exactly digits, we're done,
327 regardless of what they contain */
328 }
329 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
330 int extra_zeros_cnt;
331
332 /* There are more than 2 digits in the exponent. See
333 if we can delete some of the leading zeros */
334 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
335 significant_digit_cnt = MIN_EXPONENT_DIGITS;
336 extra_zeros_cnt = exponent_digit_cnt -
337 significant_digit_cnt;
338
339 /* Delete extra_zeros_cnt worth of characters from the
340 front of the exponent */
341 assert(extra_zeros_cnt >= 0);
342
343 /* Add one to significant_digit_cnt to copy the
344 trailing 0 byte, thus setting the length */
345 memmove(start,
346 start + extra_zeros_cnt,
347 significant_digit_cnt + 1);
348 }
349 else {
350 /* If there are fewer than 2 digits, add zeros
351 until there are 2, if there's enough room */
352 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
353 if (start + zeros + exponent_digit_cnt + 1
354 < buffer + buf_size) {
355 memmove(start + zeros, start,
356 exponent_digit_cnt + 1);
357 memset(start, '0', zeros);
358 }
359 }
360 }
361}
362
Eric Smith0923d1d2009-04-16 20:16:10 +0000363/* Ensure that buffer has a decimal point in it. The decimal point will not
364 be in the current locale, it will always be '.'. Don't add a decimal if an
365 exponent is present. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000366Py_LOCAL_INLINE(void)
367ensure_decimal_point(char* buffer, size_t buf_size)
368{
369 int insert_count = 0;
370 char* chars_to_insert;
371
372 /* search for the first non-digit character */
373 char *p = buffer;
Eric Smith2ad79e82008-07-19 00:33:23 +0000374 if (*p == '-' || *p == '+')
375 /* Skip leading sign, if present. I think this could only
376 ever be '-', but it can't hurt to check for both. */
377 ++p;
Eric Smithb2c7af82008-04-30 02:12:09 +0000378 while (*p && isdigit(Py_CHARMASK(*p)))
379 ++p;
380
381 if (*p == '.') {
382 if (isdigit(Py_CHARMASK(*(p+1)))) {
383 /* Nothing to do, we already have a decimal
384 point and a digit after it */
385 }
386 else {
387 /* We have a decimal point, but no following
388 digit. Insert a zero after the decimal. */
389 ++p;
390 chars_to_insert = "0";
391 insert_count = 1;
392 }
393 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000394 else if (!(*p == 'e' || *p == 'E')) {
395 /* Don't add ".0" if we have an exponent. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000396 chars_to_insert = ".0";
397 insert_count = 2;
398 }
399 if (insert_count) {
400 size_t buf_len = strlen(buffer);
401 if (buf_len + insert_count + 1 >= buf_size) {
402 /* If there is not enough room in the buffer
403 for the additional text, just skip it. It's
404 not worth generating an error over. */
405 }
406 else {
407 memmove(p + insert_count, p,
408 buffer + strlen(buffer) - p + 1);
409 memcpy(p, chars_to_insert, insert_count);
410 }
411 }
412}
413
Christian Heimesc3f30c42008-02-22 16:37:40 +0000414/* see FORMATBUFLEN in unicodeobject.c */
415#define FLOAT_FORMATBUFLEN 120
416
Martin v. Löwis737ea822004-06-08 18:52:54 +0000417/**
418 * PyOS_ascii_formatd:
419 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000420 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000421 * @format: The printf()-style format to use for the
422 * code to use for converting.
423 * @d: The #gdouble to convert
424 *
425 * Converts a #gdouble to a string, using the '.' as
426 * decimal point. To format the number you pass in
427 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000428 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000429 *
Christian Heimesb186d002008-03-18 15:15:01 +0000430 * 'Z' is the same as 'g', except it always has a decimal and
431 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000432 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000433 * Return value: The pointer to the buffer with the converted string.
434 **/
435char *
Eric Smithcc32a112009-04-26 21:35:14 +0000436_PyOS_ascii_formatd(char *buffer,
Christian Heimesb186d002008-03-18 15:15:01 +0000437 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000438 const char *format,
439 double d)
440{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000441 char format_char;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000442 size_t format_len = strlen(format);
443
Christian Heimesb186d002008-03-18 15:15:01 +0000444 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
445 also with at least one character past the decimal. */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000446 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000447
Christian Heimesc3f30c42008-02-22 16:37:40 +0000448 /* The last character in the format string must be the format char */
449 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000450
Martin v. Löwis737ea822004-06-08 18:52:54 +0000451 if (format[0] != '%')
452 return NULL;
453
Christian Heimesc3f30c42008-02-22 16:37:40 +0000454 /* I'm not sure why this test is here. It's ensuring that the format
455 string after the first character doesn't have a single quote, a
456 lowercase l, or a percent. This is the reverse of the commented-out
457 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000458 if (strpbrk(format + 1, "'l%"))
459 return NULL;
460
Christian Heimesb186d002008-03-18 15:15:01 +0000461 /* Also curious about this function is that it accepts format strings
462 like "%xg", which are invalid for floats. In general, the
463 interface to this function is not very good, but changing it is
464 difficult because it's a public API. */
465
Martin v. Löwis737ea822004-06-08 18:52:54 +0000466 if (!(format_char == 'e' || format_char == 'E' ||
467 format_char == 'f' || format_char == 'F' ||
Christian Heimesc3f30c42008-02-22 16:37:40 +0000468 format_char == 'g' || format_char == 'G' ||
Eric Smith0923d1d2009-04-16 20:16:10 +0000469 format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000470 return NULL;
471
Eric Smith0923d1d2009-04-16 20:16:10 +0000472 /* Map 'Z' format_char to 'g', by copying the format string and
Christian Heimesb186d002008-03-18 15:15:01 +0000473 replacing the final char with a 'g' */
Eric Smith0923d1d2009-04-16 20:16:10 +0000474 if (format_char == 'Z') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000475 if (format_len + 1 >= sizeof(tmp_format)) {
476 /* The format won't fit in our copy. Error out. In
Christian Heimesb186d002008-03-18 15:15:01 +0000477 practice, this will never happen and will be
478 detected by returning NULL */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000479 return NULL;
480 }
481 strcpy(tmp_format, format);
482 tmp_format[format_len - 1] = 'g';
483 format = tmp_format;
484 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000485
Christian Heimesb186d002008-03-18 15:15:01 +0000486
Christian Heimesc3f30c42008-02-22 16:37:40 +0000487 /* Have PyOS_snprintf do the hard work */
Christian Heimesb186d002008-03-18 15:15:01 +0000488 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000489
Eric Smithb2c7af82008-04-30 02:12:09 +0000490 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000491
Eric Smithb2c7af82008-04-30 02:12:09 +0000492 /* Get the current locale, and find the decimal point string.
Eric Smith0923d1d2009-04-16 20:16:10 +0000493 Convert that string back to a dot. */
494 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000495
496 /* If an exponent exists, ensure that the exponent is at least
497 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
498 for the extra zeros. Also, if there are more than
499 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
500 back to MIN_EXPONENT_DIGITS */
Mark Dickinsonce95e562009-04-26 20:02:24 +0000501 ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000502
Christian Heimesb186d002008-03-18 15:15:01 +0000503 /* If format_char is 'Z', make sure we have at least one character
504 after the decimal point (and make sure we have a decimal point). */
Eric Smithb2c7af82008-04-30 02:12:09 +0000505 if (format_char == 'Z')
506 ensure_decimal_point(buffer, buf_size);
Christian Heimesb186d002008-03-18 15:15:01 +0000507
Martin v. Löwis737ea822004-06-08 18:52:54 +0000508 return buffer;
509}
510
Eric Smithcc32a112009-04-26 21:35:14 +0000511char *
512PyOS_ascii_formatd(char *buffer,
513 size_t buf_size,
514 const char *format,
515 double d)
516{
517 if (PyErr_WarnEx(PyExc_DeprecationWarning,
518 "PyOS_ascii_formatd is deprecated, "
519 "use PyOS_double_to_string instead", 1) < 0)
520 return NULL;
521
522 return _PyOS_ascii_formatd(buffer, buf_size, format, d);
523}
524
Eric Smith0923d1d2009-04-16 20:16:10 +0000525#ifdef PY_NO_SHORT_FLOAT_REPR
526
527/* The fallback code to use if _Py_dg_dtoa is not available. */
528
Mark Dickinson3370cce2009-04-17 22:40:53 +0000529/* Remove trailing zeros after the decimal point from a numeric string; also
530 remove the decimal point if all digits following it are zero. The numeric
531 string must end in '\0', and should not have any leading or trailing
532 whitespace. Assumes that the decimal point is '.'. */
533Py_LOCAL_INLINE(void)
534remove_trailing_zeros(char *buffer)
535{
536 char *old_fraction_end, *new_fraction_end, *end, *p;
537
538 p = buffer;
539 if (*p == '-' || *p == '+')
540 /* Skip leading sign, if present */
541 ++p;
542 while (isdigit(Py_CHARMASK(*p)))
543 ++p;
544
545 /* if there's no decimal point there's nothing to do */
546 if (*p++ != '.')
547 return;
548
549 /* scan any digits after the point */
550 while (isdigit(Py_CHARMASK(*p)))
551 ++p;
552 old_fraction_end = p;
553
554 /* scan up to ending '\0' */
555 while (*p != '\0')
556 p++;
557 /* +1 to make sure that we move the null byte as well */
558 end = p+1;
559
560 /* scan back from fraction_end, looking for removable zeros */
561 p = old_fraction_end;
562 while (*(p-1) == '0')
563 --p;
564 /* and remove point if we've got that far */
565 if (*(p-1) == '.')
566 --p;
567 new_fraction_end = p;
568
569 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
570}
571
572
Eric Smith0923d1d2009-04-16 20:16:10 +0000573PyAPI_FUNC(char *) PyOS_double_to_string(double val,
574 char format_code,
575 int precision,
576 int flags,
577 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000578{
Eric Smith0923d1d2009-04-16 20:16:10 +0000579 char buf[128];
580 char format[32];
581 Py_ssize_t len;
582 char *result;
583 char *p;
584 int t;
585 int upper = 0;
Mark Dickinson3370cce2009-04-17 22:40:53 +0000586 int strip_trailing_zeros = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000587
588 /* Validate format_code, and map upper and lower case */
589 switch (format_code) {
590 case 'e': /* exponent */
591 case 'f': /* fixed */
592 case 'g': /* general */
593 break;
594 case 'E':
595 upper = 1;
596 format_code = 'e';
597 break;
598 case 'F':
599 upper = 1;
600 format_code = 'f';
601 break;
602 case 'G':
603 upper = 1;
604 format_code = 'g';
605 break;
606 case 'r': /* repr format */
607 /* Supplied precision is unused, must be 0. */
608 if (precision != 0) {
609 PyErr_BadInternalCall();
610 return NULL;
611 }
612 precision = 17;
613 format_code = 'g';
614 break;
615 case 's': /* str format */
616 /* Supplied precision is unused, must be 0. */
617 if (precision != 0) {
618 PyErr_BadInternalCall();
619 return NULL;
620 }
Mark Dickinson3370cce2009-04-17 22:40:53 +0000621 /* switch to exponential notation at 1e11, or 1e12 if we're
622 not adding a .0 */
623 if (fabs(val) >= (flags & Py_DTSF_ADD_DOT_0 ? 1e11 : 1e12)) {
624 precision = 11;
625 format_code = 'e';
626 strip_trailing_zeros = 1;
627 }
628 else {
629 precision = 12;
630 format_code = 'g';
631 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000632 break;
633 default:
634 PyErr_BadInternalCall();
635 return NULL;
636 }
637
638 /* Handle nan and inf. */
639 if (Py_IS_NAN(val)) {
640 strcpy(buf, "nan");
641 t = Py_DTST_NAN;
642 } else if (Py_IS_INFINITY(val)) {
643 if (copysign(1., val) == 1.)
644 strcpy(buf, "inf");
645 else
646 strcpy(buf, "-inf");
647 t = Py_DTST_INFINITE;
648 } else {
649 t = Py_DTST_FINITE;
650
651
Mark Dickinson3370cce2009-04-17 22:40:53 +0000652 if ((flags & Py_DTSF_ADD_DOT_0) && (format_code != 'e'))
Eric Smith0923d1d2009-04-16 20:16:10 +0000653 format_code = 'Z';
654
Eric Smithcc32a112009-04-26 21:35:14 +0000655 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
656 (flags & Py_DTSF_ALT ? "#" : ""), precision,
657 format_code);
658 _PyOS_ascii_formatd(buf, sizeof(buf), format, val);
Mark Dickinson3370cce2009-04-17 22:40:53 +0000659 /* remove trailing zeros if necessary */
660 if (strip_trailing_zeros)
661 remove_trailing_zeros(buf);
Eric Smith0923d1d2009-04-16 20:16:10 +0000662 }
663
664 len = strlen(buf);
665
666 /* Add 1 for the trailing 0 byte.
667 Add 1 because we might need to make room for the sign.
668 */
669 result = PyMem_Malloc(len + 2);
670 if (result == NULL) {
671 PyErr_NoMemory();
672 return NULL;
673 }
674 p = result;
675
Mark Dickinsonad476da2009-04-23 19:14:16 +0000676 /* Add sign when requested. It's convenient (esp. when formatting
677 complex numbers) to include a sign even for inf and nan. */
678 if (flags & Py_DTSF_SIGN && buf[0] != '-')
Eric Smith0923d1d2009-04-16 20:16:10 +0000679 *p++ = '+';
680
681 strcpy(p, buf);
682
683 if (upper) {
684 /* Convert to upper case. */
685 char *p1;
686 for (p1 = p; *p1; p1++)
687 *p1 = toupper(*p1);
688 }
689
690 if (type)
691 *type = t;
692 return result;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000693}
Eric Smith0923d1d2009-04-16 20:16:10 +0000694
695#else
696
697/* _Py_dg_dtoa is available. */
698
699/* I'm using a lookup table here so that I don't have to invent a non-locale
700 specific way to convert to uppercase */
701#define OFS_INF 0
702#define OFS_NAN 1
703#define OFS_E 2
704
705/* The lengths of these are known to the code below, so don't change them */
706static char *lc_float_strings[] = {
707 "inf",
708 "nan",
709 "e",
710};
711static char *uc_float_strings[] = {
712 "INF",
713 "NAN",
714 "E",
715};
716
717
718/* Convert a double d to a string, and return a PyMem_Malloc'd block of
719 memory contain the resulting string.
720
721 Arguments:
722 d is the double to be converted
723 format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g'
724 correspond to '%e', '%f' and '%g'; 'r' and 's' correspond
725 to repr and str.
726 mode is one of '0', '2' or '3', and is completely determined by
727 format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0.
728 precision is the desired precision
729 always_add_sign is nonzero if a '+' sign should be included for positive
730 numbers
731 add_dot_0_if_integer is nonzero if integers in non-exponential form
732 should have ".0" added. Only applies to format codes 'r', 's', and 'g'.
733 use_alt_formatting is nonzero if alternative formatting should be
734 used. Only applies to format codes 'e', 'f' and 'g'.
735 type, if non-NULL, will be set to one of these constants to identify
736 the type of the 'd' argument:
737 Py_DTST_FINITE
738 Py_DTST_INFINITE
739 Py_DTST_NAN
740
741 Returns a PyMem_Malloc'd block of memory containing the resulting string,
742 or NULL on error. If NULL is returned, the Python error has been set.
743 */
744
745static char *
746format_float_short(double d, char format_code,
747 int mode, Py_ssize_t precision,
748 int always_add_sign, int add_dot_0_if_integer,
749 int use_alt_formatting, char **float_strings, int *type)
750{
751 char *buf = NULL;
752 char *p = NULL;
753 Py_ssize_t bufsize = 0;
754 char *digits, *digits_end;
755 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
756 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
757 _Py_SET_53BIT_PRECISION_HEADER;
758
759 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
760 Must be matched by a call to _Py_dg_freedtoa. */
761 _Py_SET_53BIT_PRECISION_START;
762 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
763 &digits_end);
764 _Py_SET_53BIT_PRECISION_END;
765
766 decpt = (Py_ssize_t)decpt_as_int;
767 if (digits == NULL) {
768 /* The only failure mode is no memory. */
769 PyErr_NoMemory();
770 goto exit;
771 }
772 assert(digits_end != NULL && digits_end >= digits);
773 digits_len = digits_end - digits;
774
Mark Dickinson3370cce2009-04-17 22:40:53 +0000775 if (digits_len && !isdigit(Py_CHARMASK(digits[0]))) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000776 /* Infinities and nans here; adapt Gay's output,
777 so convert Infinity to inf and NaN to nan, and
778 ignore sign of nan. Then return. */
779
Mark Dickinsonad476da2009-04-23 19:14:16 +0000780 /* ignore the actual sign of a nan */
781 if (digits[0] == 'n' || digits[0] == 'N')
782 sign = 0;
783
Eric Smith0923d1d2009-04-16 20:16:10 +0000784 /* We only need 5 bytes to hold the result "+inf\0" . */
785 bufsize = 5; /* Used later in an assert. */
786 buf = (char *)PyMem_Malloc(bufsize);
787 if (buf == NULL) {
788 PyErr_NoMemory();
789 goto exit;
790 }
791 p = buf;
792
Mark Dickinsonad476da2009-04-23 19:14:16 +0000793 if (sign == 1) {
794 *p++ = '-';
795 }
796 else if (always_add_sign) {
797 *p++ = '+';
798 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000799 if (digits[0] == 'i' || digits[0] == 'I') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000800 strncpy(p, float_strings[OFS_INF], 3);
801 p += 3;
802
803 if (type)
804 *type = Py_DTST_INFINITE;
805 }
806 else if (digits[0] == 'n' || digits[0] == 'N') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000807 strncpy(p, float_strings[OFS_NAN], 3);
808 p += 3;
809
810 if (type)
811 *type = Py_DTST_NAN;
812 }
813 else {
814 /* shouldn't get here: Gay's code should always return
815 something starting with a digit, an 'I', or 'N' */
816 strncpy(p, "ERR", 3);
817 p += 3;
818 assert(0);
819 }
820 goto exit;
821 }
822
823 /* The result must be finite (not inf or nan). */
824 if (type)
825 *type = Py_DTST_FINITE;
826
827
828 /* We got digits back, format them. We may need to pad 'digits'
829 either on the left or right (or both) with extra zeros, so in
830 general the resulting string has the form
831
832 [<sign>]<zeros><digits><zeros>[<exponent>]
833
834 where either of the <zeros> pieces could be empty, and there's a
835 decimal point that could appear either in <digits> or in the
836 leading or trailing <zeros>.
837
838 Imagine an infinite 'virtual' string vdigits, consisting of the
839 string 'digits' (starting at index 0) padded on both the left and
840 right with infinite strings of zeros. We want to output a slice
841
842 vdigits[vdigits_start : vdigits_end]
843
844 of this virtual string. Thus if vdigits_start < 0 then we'll end
845 up producing some leading zeros; if vdigits_end > digits_len there
846 will be trailing zeros in the output. The next section of code
847 determines whether to use an exponent or not, figures out the
848 position 'decpt' of the decimal point, and computes 'vdigits_start'
849 and 'vdigits_end'. */
850 vdigits_end = digits_len;
851 switch (format_code) {
852 case 'e':
853 use_exp = 1;
854 vdigits_end = precision;
855 break;
856 case 'f':
857 vdigits_end = decpt + precision;
858 break;
859 case 'g':
860 if (decpt <= -4 || decpt > precision)
861 use_exp = 1;
862 if (use_alt_formatting)
863 vdigits_end = precision;
864 break;
865 case 'r':
866 /* convert to exponential format at 1e16. We used to convert
867 at 1e17, but that gives odd-looking results for some values
868 when a 16-digit 'shortest' repr is padded with bogus zeros.
869 For example, repr(2e16+8) would give 20000000000000010.0;
870 the true value is 20000000000000008.0. */
871 if (decpt <= -4 || decpt > 16)
872 use_exp = 1;
873 break;
874 case 's':
875 /* if we're forcing a digit after the point, convert to
876 exponential format at 1e11. If not, convert at 1e12. */
877 if (decpt <= -4 || decpt >
878 (add_dot_0_if_integer ? precision-1 : precision))
879 use_exp = 1;
880 break;
881 default:
882 PyErr_BadInternalCall();
883 goto exit;
884 }
885
886 /* if using an exponent, reset decimal point position to 1 and adjust
887 exponent accordingly.*/
888 if (use_exp) {
889 exp = decpt - 1;
890 decpt = 1;
891 }
892 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
893 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
894 vdigits_start = decpt <= 0 ? decpt-1 : 0;
895 if (!use_exp && add_dot_0_if_integer)
896 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
897 else
898 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
899
900 /* double check inequalities */
901 assert(vdigits_start <= 0 &&
902 0 <= digits_len &&
903 digits_len <= vdigits_end);
904 /* decimal point should be in (vdigits_start, vdigits_end] */
905 assert(vdigits_start < decpt && decpt <= vdigits_end);
906
907 /* Compute an upper bound how much memory we need. This might be a few
908 chars too long, but no big deal. */
909 bufsize =
910 /* sign, decimal point and trailing 0 byte */
911 3 +
912
913 /* total digit count (including zero padding on both sides) */
914 (vdigits_end - vdigits_start) +
915
916 /* exponent "e+100", max 3 numerical digits */
917 (use_exp ? 5 : 0);
918
919 /* Now allocate the memory and initialize p to point to the start of
920 it. */
921 buf = (char *)PyMem_Malloc(bufsize);
922 if (buf == NULL) {
923 PyErr_NoMemory();
924 goto exit;
925 }
926 p = buf;
927
928 /* Add a negative sign if negative, and a plus sign if non-negative
929 and always_add_sign is true. */
930 if (sign == 1)
931 *p++ = '-';
932 else if (always_add_sign)
933 *p++ = '+';
934
935 /* note that exactly one of the three 'if' conditions is true,
936 so we include exactly one decimal point */
937 /* Zero padding on left of digit string */
938 if (decpt <= 0) {
939 memset(p, '0', decpt-vdigits_start);
940 p += decpt - vdigits_start;
941 *p++ = '.';
942 memset(p, '0', 0-decpt);
943 p += 0-decpt;
944 }
945 else {
946 memset(p, '0', 0-vdigits_start);
947 p += 0 - vdigits_start;
948 }
949
950 /* Digits, with included decimal point */
951 if (0 < decpt && decpt <= digits_len) {
952 strncpy(p, digits, decpt-0);
953 p += decpt-0;
954 *p++ = '.';
955 strncpy(p, digits+decpt, digits_len-decpt);
956 p += digits_len-decpt;
957 }
958 else {
959 strncpy(p, digits, digits_len);
960 p += digits_len;
961 }
962
963 /* And zeros on the right */
964 if (digits_len < decpt) {
965 memset(p, '0', decpt-digits_len);
966 p += decpt-digits_len;
967 *p++ = '.';
968 memset(p, '0', vdigits_end-decpt);
969 p += vdigits_end-decpt;
970 }
971 else {
972 memset(p, '0', vdigits_end-digits_len);
973 p += vdigits_end-digits_len;
974 }
975
976 /* Delete a trailing decimal pt unless using alternative formatting. */
977 if (p[-1] == '.' && !use_alt_formatting)
978 p--;
979
980 /* Now that we've done zero padding, add an exponent if needed. */
981 if (use_exp) {
982 *p++ = float_strings[OFS_E][0];
983 exp_len = sprintf(p, "%+.02d", exp);
984 p += exp_len;
985 }
986 exit:
987 if (buf) {
988 *p = '\0';
989 /* It's too late if this fails, as we've already stepped on
990 memory that isn't ours. But it's an okay debugging test. */
991 assert(p-buf < bufsize);
992 }
993 if (digits)
994 _Py_dg_freedtoa(digits);
995
996 return buf;
997}
998
999
1000PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Eric Smith193125a2009-04-16 22:08:31 +00001001 char format_code,
1002 int precision,
1003 int flags,
Eric Smith0923d1d2009-04-16 20:16:10 +00001004 int *type)
1005{
Eric Smith193125a2009-04-16 22:08:31 +00001006 char **float_strings = lc_float_strings;
1007 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +00001008
Eric Smith193125a2009-04-16 22:08:31 +00001009 /* Validate format_code, and map upper and lower case. Compute the
1010 mode and make any adjustments as needed. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001011 switch (format_code) {
Eric Smith193125a2009-04-16 22:08:31 +00001012 /* exponent */
Eric Smith0923d1d2009-04-16 20:16:10 +00001013 case 'E':
Eric Smith0923d1d2009-04-16 20:16:10 +00001014 float_strings = uc_float_strings;
Eric Smith193125a2009-04-16 22:08:31 +00001015 format_code = 'e';
1016 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001017 case 'e':
1018 mode = 2;
1019 precision++;
1020 break;
Eric Smith193125a2009-04-16 22:08:31 +00001021
1022 /* fixed */
1023 case 'F':
1024 float_strings = uc_float_strings;
1025 format_code = 'f';
1026 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001027 case 'f':
1028 mode = 3;
1029 break;
Eric Smith193125a2009-04-16 22:08:31 +00001030
1031 /* general */
1032 case 'G':
1033 float_strings = uc_float_strings;
1034 format_code = 'g';
1035 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +00001036 case 'g':
1037 mode = 2;
1038 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1039 if (precision == 0)
1040 precision = 1;
1041 break;
Eric Smith193125a2009-04-16 22:08:31 +00001042
1043 /* repr format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001044 case 'r':
Eric Smith0923d1d2009-04-16 20:16:10 +00001045 mode = 0;
1046 /* Supplied precision is unused, must be 0. */
1047 if (precision != 0) {
1048 PyErr_BadInternalCall();
1049 return NULL;
1050 }
1051 break;
Eric Smith193125a2009-04-16 22:08:31 +00001052
1053 /* str format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001054 case 's':
1055 mode = 2;
1056 /* Supplied precision is unused, must be 0. */
1057 if (precision != 0) {
1058 PyErr_BadInternalCall();
1059 return NULL;
1060 }
1061 precision = 12;
1062 break;
Eric Smith193125a2009-04-16 22:08:31 +00001063
1064 default:
1065 PyErr_BadInternalCall();
1066 return NULL;
Eric Smith0923d1d2009-04-16 20:16:10 +00001067 }
1068
Eric Smith193125a2009-04-16 22:08:31 +00001069 return format_float_short(val, format_code, mode, precision,
Eric Smith0923d1d2009-04-16 20:16:10 +00001070 flags & Py_DTSF_SIGN,
1071 flags & Py_DTSF_ADD_DOT_0,
1072 flags & Py_DTSF_ALT,
1073 float_strings, type);
1074}
1075#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */