blob: 002714f7c2917e8507d8b6f2a33fc73e6e1d5166 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
Martin v. Löwis737ea822004-06-08 18:52:54 +000010
11
12/**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
17 *
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
23 *
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
28 *
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000033 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000034 *
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
37 *
38 * Return value: the #gdouble value.
39 **/
Eric Smith0923d1d2009-04-16 20:16:10 +000040
41#ifndef PY_NO_SHORT_FLOAT_REPR
42
43double
44PyOS_ascii_strtod(const char *nptr, char **endptr)
45{
46 double result;
47 _Py_SET_53BIT_PRECISION_HEADER;
48
49 assert(nptr != NULL);
50 /* Set errno to zero, so that we can distinguish zero results
51 and underflows */
52 errno = 0;
53
54 _Py_SET_53BIT_PRECISION_START;
55 result = _Py_dg_strtod(nptr, endptr);
56 _Py_SET_53BIT_PRECISION_END;
57
58 return result;
59
60}
61
62#else
63
64/*
65 Use system strtod; since strtod is locale aware, we may
66 have to first fix the decimal separator.
67
68 Note that unlike _Py_dg_strtod, the system strtod may not always give
69 correctly rounded results.
70*/
71
Martin v. Löwis737ea822004-06-08 18:52:54 +000072double
Neal Norwitze7214a12005-12-18 05:03:17 +000073PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000074{
75 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000076 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000077 struct lconv *locale_data;
78 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000079 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000080 const char *p, *decimal_point_pos;
81 const char *end = NULL; /* Silence gcc */
Christian Heimesfaf2f632008-01-06 16:59:19 +000082 const char *digits_pos = NULL;
83 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000084
Martin v. Löwis737ea822004-06-08 18:52:54 +000085 assert(nptr != NULL);
86
87 fail_pos = NULL;
88
89 locale_data = localeconv();
90 decimal_point = locale_data->decimal_point;
91 decimal_point_len = strlen(decimal_point);
92
93 assert(decimal_point_len != 0);
94
95 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +000096
97 /* We process any leading whitespace and the optional sign manually,
98 then pass the remainder to the system strtod. This ensures that
99 the result of an underflow has the correct sign. (bug #1725) */
100
101 p = nptr;
102 /* Skip leading space */
103 while (ISSPACE(*p))
104 p++;
105
106 /* Process leading sign, if present */
107 if (*p == '-') {
108 negate = 1;
109 p++;
110 } else if (*p == '+') {
111 p++;
112 }
113
114 /* What's left should begin with a digit, a decimal point, or one of
115 the letters i, I, n, N. It should not begin with 0x or 0X */
116 if ((!ISDIGIT(*p) &&
117 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
118 ||
119 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
120 {
121 if (endptr)
122 *endptr = (char*)nptr;
123 errno = EINVAL;
124 return val;
125 }
126 digits_pos = p;
127
Martin v. Löwis737ea822004-06-08 18:52:54 +0000128 if (decimal_point[0] != '.' ||
129 decimal_point[1] != 0)
130 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000131 while (ISDIGIT(*p))
132 p++;
133
134 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000135 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000136 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000137
Martin v. Löwis737ea822004-06-08 18:52:54 +0000138 while (ISDIGIT(*p))
139 p++;
140
Neal Norwitze7214a12005-12-18 05:03:17 +0000141 if (*p == 'e' || *p == 'E')
142 p++;
143 if (*p == '+' || *p == '-')
144 p++;
145 while (ISDIGIT(*p))
146 p++;
147 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000148 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000149 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
150 {
151 /* Python bug #1417699 */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000152 if (endptr)
153 *endptr = (char*)nptr;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000154 errno = EINVAL;
155 return val;
156 }
Christian Heimesb186d002008-03-18 15:15:01 +0000157 /* For the other cases, we need not convert the decimal
158 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000159 }
160
Neal Norwitze7214a12005-12-18 05:03:17 +0000161 /* Set errno to zero, so that we can distinguish zero results
162 and underflows */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000163 errno = 0;
164
165 if (decimal_point_pos)
166 {
167 char *copy, *c;
168
Christian Heimesb186d002008-03-18 15:15:01 +0000169 /* We need to convert the '.' to the locale specific decimal
170 point */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000171 copy = (char *)PyMem_MALLOC(end - digits_pos +
172 1 + decimal_point_len);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000173 if (copy == NULL) {
174 if (endptr)
175 *endptr = (char *)nptr;
176 errno = ENOMEM;
177 return val;
178 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000179
180 c = copy;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000181 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
182 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000183 memcpy(c, decimal_point, decimal_point_len);
184 c += decimal_point_len;
Christian Heimesb186d002008-03-18 15:15:01 +0000185 memcpy(c, decimal_point_pos + 1,
186 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000187 c += end - (decimal_point_pos + 1);
188 *c = 0;
189
190 val = strtod(copy, &fail_pos);
191
192 if (fail_pos)
193 {
194 if (fail_pos > decimal_point_pos)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000195 fail_pos = (char *)digits_pos +
196 (fail_pos - copy) -
197 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000198 else
Christian Heimesfaf2f632008-01-06 16:59:19 +0000199 fail_pos = (char *)digits_pos +
200 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000201 }
202
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000204
205 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000206 else {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000207 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000208 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000209
Christian Heimesfaf2f632008-01-06 16:59:19 +0000210 if (fail_pos == digits_pos)
211 fail_pos = (char *)nptr;
212
213 if (negate && fail_pos != nptr)
214 val = -val;
215
Martin v. Löwis737ea822004-06-08 18:52:54 +0000216 if (endptr)
217 *endptr = fail_pos;
218
219 return val;
220}
221
Eric Smith0923d1d2009-04-16 20:16:10 +0000222#endif
223
224double
225PyOS_ascii_atof(const char *nptr)
226{
227 return PyOS_ascii_strtod(nptr, NULL);
228}
229
230
Eric Smithb2c7af82008-04-30 02:12:09 +0000231/* Given a string that may have a decimal point in the current
232 locale, change it back to a dot. Since the string cannot get
233 longer, no need for a maximum buffer size parameter. */
234Py_LOCAL_INLINE(void)
235change_decimal_from_locale_to_dot(char* buffer)
236{
237 struct lconv *locale_data = localeconv();
238 const char *decimal_point = locale_data->decimal_point;
239
240 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
241 size_t decimal_point_len = strlen(decimal_point);
242
243 if (*buffer == '+' || *buffer == '-')
244 buffer++;
245 while (isdigit(Py_CHARMASK(*buffer)))
246 buffer++;
247 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
248 *buffer = '.';
249 buffer++;
250 if (decimal_point_len > 1) {
251 /* buffer needs to get smaller */
252 size_t rest_len = strlen(buffer +
253 (decimal_point_len - 1));
254 memmove(buffer,
255 buffer + (decimal_point_len - 1),
256 rest_len);
257 buffer[rest_len] = 0;
258 }
259 }
260 }
261}
262
Martin v. Löwis737ea822004-06-08 18:52:54 +0000263
Christian Heimesc3f30c42008-02-22 16:37:40 +0000264/* From the C99 standard, section 7.19.6:
265The exponent always contains at least two digits, and only as many more digits
266as necessary to represent the exponent.
267*/
268#define MIN_EXPONENT_DIGITS 2
269
Eric Smithb2c7af82008-04-30 02:12:09 +0000270/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
271 in length. */
272Py_LOCAL_INLINE(void)
273ensure_minumim_exponent_length(char* buffer, size_t buf_size)
274{
275 char *p = strpbrk(buffer, "eE");
276 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
277 char *start = p + 2;
278 int exponent_digit_cnt = 0;
279 int leading_zero_cnt = 0;
280 int in_leading_zeros = 1;
281 int significant_digit_cnt;
282
283 /* Skip over the exponent and the sign. */
284 p += 2;
285
286 /* Find the end of the exponent, keeping track of leading
287 zeros. */
288 while (*p && isdigit(Py_CHARMASK(*p))) {
289 if (in_leading_zeros && *p == '0')
290 ++leading_zero_cnt;
291 if (*p != '0')
292 in_leading_zeros = 0;
293 ++p;
294 ++exponent_digit_cnt;
295 }
296
297 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
298 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
299 /* If there are 2 exactly digits, we're done,
300 regardless of what they contain */
301 }
302 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
303 int extra_zeros_cnt;
304
305 /* There are more than 2 digits in the exponent. See
306 if we can delete some of the leading zeros */
307 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
308 significant_digit_cnt = MIN_EXPONENT_DIGITS;
309 extra_zeros_cnt = exponent_digit_cnt -
310 significant_digit_cnt;
311
312 /* Delete extra_zeros_cnt worth of characters from the
313 front of the exponent */
314 assert(extra_zeros_cnt >= 0);
315
316 /* Add one to significant_digit_cnt to copy the
317 trailing 0 byte, thus setting the length */
318 memmove(start,
319 start + extra_zeros_cnt,
320 significant_digit_cnt + 1);
321 }
322 else {
323 /* If there are fewer than 2 digits, add zeros
324 until there are 2, if there's enough room */
325 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
326 if (start + zeros + exponent_digit_cnt + 1
327 < buffer + buf_size) {
328 memmove(start + zeros, start,
329 exponent_digit_cnt + 1);
330 memset(start, '0', zeros);
331 }
332 }
333 }
334}
335
Eric Smith0923d1d2009-04-16 20:16:10 +0000336/* Ensure that buffer has a decimal point in it. The decimal point will not
337 be in the current locale, it will always be '.'. Don't add a decimal if an
338 exponent is present. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000339Py_LOCAL_INLINE(void)
340ensure_decimal_point(char* buffer, size_t buf_size)
341{
342 int insert_count = 0;
343 char* chars_to_insert;
344
345 /* search for the first non-digit character */
346 char *p = buffer;
Eric Smith2ad79e82008-07-19 00:33:23 +0000347 if (*p == '-' || *p == '+')
348 /* Skip leading sign, if present. I think this could only
349 ever be '-', but it can't hurt to check for both. */
350 ++p;
Eric Smithb2c7af82008-04-30 02:12:09 +0000351 while (*p && isdigit(Py_CHARMASK(*p)))
352 ++p;
353
354 if (*p == '.') {
355 if (isdigit(Py_CHARMASK(*(p+1)))) {
356 /* Nothing to do, we already have a decimal
357 point and a digit after it */
358 }
359 else {
360 /* We have a decimal point, but no following
361 digit. Insert a zero after the decimal. */
362 ++p;
363 chars_to_insert = "0";
364 insert_count = 1;
365 }
366 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000367 else if (!(*p == 'e' || *p == 'E')) {
368 /* Don't add ".0" if we have an exponent. */
Eric Smithb2c7af82008-04-30 02:12:09 +0000369 chars_to_insert = ".0";
370 insert_count = 2;
371 }
372 if (insert_count) {
373 size_t buf_len = strlen(buffer);
374 if (buf_len + insert_count + 1 >= buf_size) {
375 /* If there is not enough room in the buffer
376 for the additional text, just skip it. It's
377 not worth generating an error over. */
378 }
379 else {
380 memmove(p + insert_count, p,
381 buffer + strlen(buffer) - p + 1);
382 memcpy(p, chars_to_insert, insert_count);
383 }
384 }
385}
386
Christian Heimesc3f30c42008-02-22 16:37:40 +0000387/* see FORMATBUFLEN in unicodeobject.c */
388#define FLOAT_FORMATBUFLEN 120
389
Martin v. Löwis737ea822004-06-08 18:52:54 +0000390/**
391 * PyOS_ascii_formatd:
392 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000393 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000394 * @format: The printf()-style format to use for the
395 * code to use for converting.
396 * @d: The #gdouble to convert
397 *
398 * Converts a #gdouble to a string, using the '.' as
399 * decimal point. To format the number you pass in
400 * a printf()-style format string. Allowed conversion
Eric Smith0923d1d2009-04-16 20:16:10 +0000401 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000402 *
Christian Heimesb186d002008-03-18 15:15:01 +0000403 * 'Z' is the same as 'g', except it always has a decimal and
404 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000405 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000406 * Return value: The pointer to the buffer with the converted string.
407 **/
408char *
409PyOS_ascii_formatd(char *buffer,
Christian Heimesb186d002008-03-18 15:15:01 +0000410 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000411 const char *format,
412 double d)
413{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000414 char format_char;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000415 size_t format_len = strlen(format);
416
Christian Heimesb186d002008-03-18 15:15:01 +0000417 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
418 also with at least one character past the decimal. */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000419 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000420
Christian Heimesc3f30c42008-02-22 16:37:40 +0000421 /* The last character in the format string must be the format char */
422 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000423
Martin v. Löwis737ea822004-06-08 18:52:54 +0000424 if (format[0] != '%')
425 return NULL;
426
Christian Heimesc3f30c42008-02-22 16:37:40 +0000427 /* I'm not sure why this test is here. It's ensuring that the format
428 string after the first character doesn't have a single quote, a
429 lowercase l, or a percent. This is the reverse of the commented-out
430 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000431 if (strpbrk(format + 1, "'l%"))
432 return NULL;
433
Christian Heimesb186d002008-03-18 15:15:01 +0000434 /* Also curious about this function is that it accepts format strings
435 like "%xg", which are invalid for floats. In general, the
436 interface to this function is not very good, but changing it is
437 difficult because it's a public API. */
438
Martin v. Löwis737ea822004-06-08 18:52:54 +0000439 if (!(format_char == 'e' || format_char == 'E' ||
440 format_char == 'f' || format_char == 'F' ||
Christian Heimesc3f30c42008-02-22 16:37:40 +0000441 format_char == 'g' || format_char == 'G' ||
Eric Smith0923d1d2009-04-16 20:16:10 +0000442 format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000443 return NULL;
444
Eric Smith0923d1d2009-04-16 20:16:10 +0000445 /* Map 'Z' format_char to 'g', by copying the format string and
Christian Heimesb186d002008-03-18 15:15:01 +0000446 replacing the final char with a 'g' */
Eric Smith0923d1d2009-04-16 20:16:10 +0000447 if (format_char == 'Z') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000448 if (format_len + 1 >= sizeof(tmp_format)) {
449 /* The format won't fit in our copy. Error out. In
Christian Heimesb186d002008-03-18 15:15:01 +0000450 practice, this will never happen and will be
451 detected by returning NULL */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000452 return NULL;
453 }
454 strcpy(tmp_format, format);
455 tmp_format[format_len - 1] = 'g';
456 format = tmp_format;
457 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000458
Christian Heimesb186d002008-03-18 15:15:01 +0000459
Christian Heimesc3f30c42008-02-22 16:37:40 +0000460 /* Have PyOS_snprintf do the hard work */
Christian Heimesb186d002008-03-18 15:15:01 +0000461 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000462
Eric Smithb2c7af82008-04-30 02:12:09 +0000463 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000464
Eric Smithb2c7af82008-04-30 02:12:09 +0000465 /* Get the current locale, and find the decimal point string.
Eric Smith0923d1d2009-04-16 20:16:10 +0000466 Convert that string back to a dot. */
467 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000468
469 /* If an exponent exists, ensure that the exponent is at least
470 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
471 for the extra zeros. Also, if there are more than
472 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
473 back to MIN_EXPONENT_DIGITS */
Eric Smithb2c7af82008-04-30 02:12:09 +0000474 ensure_minumim_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000475
Christian Heimesb186d002008-03-18 15:15:01 +0000476 /* If format_char is 'Z', make sure we have at least one character
477 after the decimal point (and make sure we have a decimal point). */
Eric Smithb2c7af82008-04-30 02:12:09 +0000478 if (format_char == 'Z')
479 ensure_decimal_point(buffer, buf_size);
Christian Heimesb186d002008-03-18 15:15:01 +0000480
Martin v. Löwis737ea822004-06-08 18:52:54 +0000481 return buffer;
482}
483
Eric Smith0923d1d2009-04-16 20:16:10 +0000484#ifdef PY_NO_SHORT_FLOAT_REPR
485
486/* The fallback code to use if _Py_dg_dtoa is not available. */
487
Mark Dickinson3370cce2009-04-17 22:40:53 +0000488/* Remove trailing zeros after the decimal point from a numeric string; also
489 remove the decimal point if all digits following it are zero. The numeric
490 string must end in '\0', and should not have any leading or trailing
491 whitespace. Assumes that the decimal point is '.'. */
492Py_LOCAL_INLINE(void)
493remove_trailing_zeros(char *buffer)
494{
495 char *old_fraction_end, *new_fraction_end, *end, *p;
496
497 p = buffer;
498 if (*p == '-' || *p == '+')
499 /* Skip leading sign, if present */
500 ++p;
501 while (isdigit(Py_CHARMASK(*p)))
502 ++p;
503
504 /* if there's no decimal point there's nothing to do */
505 if (*p++ != '.')
506 return;
507
508 /* scan any digits after the point */
509 while (isdigit(Py_CHARMASK(*p)))
510 ++p;
511 old_fraction_end = p;
512
513 /* scan up to ending '\0' */
514 while (*p != '\0')
515 p++;
516 /* +1 to make sure that we move the null byte as well */
517 end = p+1;
518
519 /* scan back from fraction_end, looking for removable zeros */
520 p = old_fraction_end;
521 while (*(p-1) == '0')
522 --p;
523 /* and remove point if we've got that far */
524 if (*(p-1) == '.')
525 --p;
526 new_fraction_end = p;
527
528 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
529}
530
531
Eric Smith0923d1d2009-04-16 20:16:10 +0000532PyAPI_FUNC(char *) PyOS_double_to_string(double val,
533 char format_code,
534 int precision,
535 int flags,
536 int *type)
Martin v. Löwis737ea822004-06-08 18:52:54 +0000537{
Eric Smith0923d1d2009-04-16 20:16:10 +0000538 char buf[128];
539 char format[32];
540 Py_ssize_t len;
541 char *result;
542 char *p;
543 int t;
544 int upper = 0;
Mark Dickinson3370cce2009-04-17 22:40:53 +0000545 int strip_trailing_zeros = 0;
Eric Smith0923d1d2009-04-16 20:16:10 +0000546
547 /* Validate format_code, and map upper and lower case */
548 switch (format_code) {
549 case 'e': /* exponent */
550 case 'f': /* fixed */
551 case 'g': /* general */
552 break;
553 case 'E':
554 upper = 1;
555 format_code = 'e';
556 break;
557 case 'F':
558 upper = 1;
559 format_code = 'f';
560 break;
561 case 'G':
562 upper = 1;
563 format_code = 'g';
564 break;
565 case 'r': /* repr format */
566 /* Supplied precision is unused, must be 0. */
567 if (precision != 0) {
568 PyErr_BadInternalCall();
569 return NULL;
570 }
571 precision = 17;
572 format_code = 'g';
573 break;
574 case 's': /* str format */
575 /* Supplied precision is unused, must be 0. */
576 if (precision != 0) {
577 PyErr_BadInternalCall();
578 return NULL;
579 }
Mark Dickinson3370cce2009-04-17 22:40:53 +0000580 /* switch to exponential notation at 1e11, or 1e12 if we're
581 not adding a .0 */
582 if (fabs(val) >= (flags & Py_DTSF_ADD_DOT_0 ? 1e11 : 1e12)) {
583 precision = 11;
584 format_code = 'e';
585 strip_trailing_zeros = 1;
586 }
587 else {
588 precision = 12;
589 format_code = 'g';
590 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000591 break;
592 default:
593 PyErr_BadInternalCall();
594 return NULL;
595 }
596
597 /* Handle nan and inf. */
598 if (Py_IS_NAN(val)) {
599 strcpy(buf, "nan");
600 t = Py_DTST_NAN;
601 } else if (Py_IS_INFINITY(val)) {
602 if (copysign(1., val) == 1.)
603 strcpy(buf, "inf");
604 else
605 strcpy(buf, "-inf");
606 t = Py_DTST_INFINITE;
607 } else {
608 t = Py_DTST_FINITE;
609
610
Mark Dickinson3370cce2009-04-17 22:40:53 +0000611 if ((flags & Py_DTSF_ADD_DOT_0) && (format_code != 'e'))
Eric Smith0923d1d2009-04-16 20:16:10 +0000612 format_code = 'Z';
613
614 PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
615 PyOS_ascii_formatd(buf, sizeof(buf), format, val);
Mark Dickinson3370cce2009-04-17 22:40:53 +0000616 /* remove trailing zeros if necessary */
617 if (strip_trailing_zeros)
618 remove_trailing_zeros(buf);
Eric Smith0923d1d2009-04-16 20:16:10 +0000619 }
620
621 len = strlen(buf);
622
623 /* Add 1 for the trailing 0 byte.
624 Add 1 because we might need to make room for the sign.
625 */
626 result = PyMem_Malloc(len + 2);
627 if (result == NULL) {
628 PyErr_NoMemory();
629 return NULL;
630 }
631 p = result;
632
Mark Dickinsonad476da2009-04-23 19:14:16 +0000633 /* Add sign when requested. It's convenient (esp. when formatting
634 complex numbers) to include a sign even for inf and nan. */
635 if (flags & Py_DTSF_SIGN && buf[0] != '-')
Eric Smith0923d1d2009-04-16 20:16:10 +0000636 *p++ = '+';
637
638 strcpy(p, buf);
639
640 if (upper) {
641 /* Convert to upper case. */
642 char *p1;
643 for (p1 = p; *p1; p1++)
644 *p1 = toupper(*p1);
645 }
646
647 if (type)
648 *type = t;
649 return result;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000650}
Eric Smith0923d1d2009-04-16 20:16:10 +0000651
652#else
653
654/* _Py_dg_dtoa is available. */
655
656/* I'm using a lookup table here so that I don't have to invent a non-locale
657 specific way to convert to uppercase */
658#define OFS_INF 0
659#define OFS_NAN 1
660#define OFS_E 2
661
662/* The lengths of these are known to the code below, so don't change them */
663static char *lc_float_strings[] = {
664 "inf",
665 "nan",
666 "e",
667};
668static char *uc_float_strings[] = {
669 "INF",
670 "NAN",
671 "E",
672};
673
674
675/* Convert a double d to a string, and return a PyMem_Malloc'd block of
676 memory contain the resulting string.
677
678 Arguments:
679 d is the double to be converted
680 format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g'
681 correspond to '%e', '%f' and '%g'; 'r' and 's' correspond
682 to repr and str.
683 mode is one of '0', '2' or '3', and is completely determined by
684 format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0.
685 precision is the desired precision
686 always_add_sign is nonzero if a '+' sign should be included for positive
687 numbers
688 add_dot_0_if_integer is nonzero if integers in non-exponential form
689 should have ".0" added. Only applies to format codes 'r', 's', and 'g'.
690 use_alt_formatting is nonzero if alternative formatting should be
691 used. Only applies to format codes 'e', 'f' and 'g'.
692 type, if non-NULL, will be set to one of these constants to identify
693 the type of the 'd' argument:
694 Py_DTST_FINITE
695 Py_DTST_INFINITE
696 Py_DTST_NAN
697
698 Returns a PyMem_Malloc'd block of memory containing the resulting string,
699 or NULL on error. If NULL is returned, the Python error has been set.
700 */
701
702static char *
703format_float_short(double d, char format_code,
704 int mode, Py_ssize_t precision,
705 int always_add_sign, int add_dot_0_if_integer,
706 int use_alt_formatting, char **float_strings, int *type)
707{
708 char *buf = NULL;
709 char *p = NULL;
710 Py_ssize_t bufsize = 0;
711 char *digits, *digits_end;
712 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
713 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
714 _Py_SET_53BIT_PRECISION_HEADER;
715
716 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
717 Must be matched by a call to _Py_dg_freedtoa. */
718 _Py_SET_53BIT_PRECISION_START;
719 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
720 &digits_end);
721 _Py_SET_53BIT_PRECISION_END;
722
723 decpt = (Py_ssize_t)decpt_as_int;
724 if (digits == NULL) {
725 /* The only failure mode is no memory. */
726 PyErr_NoMemory();
727 goto exit;
728 }
729 assert(digits_end != NULL && digits_end >= digits);
730 digits_len = digits_end - digits;
731
Mark Dickinson3370cce2009-04-17 22:40:53 +0000732 if (digits_len && !isdigit(Py_CHARMASK(digits[0]))) {
Eric Smith0923d1d2009-04-16 20:16:10 +0000733 /* Infinities and nans here; adapt Gay's output,
734 so convert Infinity to inf and NaN to nan, and
735 ignore sign of nan. Then return. */
736
Mark Dickinsonad476da2009-04-23 19:14:16 +0000737 /* ignore the actual sign of a nan */
738 if (digits[0] == 'n' || digits[0] == 'N')
739 sign = 0;
740
Eric Smith0923d1d2009-04-16 20:16:10 +0000741 /* We only need 5 bytes to hold the result "+inf\0" . */
742 bufsize = 5; /* Used later in an assert. */
743 buf = (char *)PyMem_Malloc(bufsize);
744 if (buf == NULL) {
745 PyErr_NoMemory();
746 goto exit;
747 }
748 p = buf;
749
Mark Dickinsonad476da2009-04-23 19:14:16 +0000750 if (sign == 1) {
751 *p++ = '-';
752 }
753 else if (always_add_sign) {
754 *p++ = '+';
755 }
Eric Smith0923d1d2009-04-16 20:16:10 +0000756 if (digits[0] == 'i' || digits[0] == 'I') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000757 strncpy(p, float_strings[OFS_INF], 3);
758 p += 3;
759
760 if (type)
761 *type = Py_DTST_INFINITE;
762 }
763 else if (digits[0] == 'n' || digits[0] == 'N') {
Eric Smith0923d1d2009-04-16 20:16:10 +0000764 strncpy(p, float_strings[OFS_NAN], 3);
765 p += 3;
766
767 if (type)
768 *type = Py_DTST_NAN;
769 }
770 else {
771 /* shouldn't get here: Gay's code should always return
772 something starting with a digit, an 'I', or 'N' */
773 strncpy(p, "ERR", 3);
774 p += 3;
775 assert(0);
776 }
777 goto exit;
778 }
779
780 /* The result must be finite (not inf or nan). */
781 if (type)
782 *type = Py_DTST_FINITE;
783
784
785 /* We got digits back, format them. We may need to pad 'digits'
786 either on the left or right (or both) with extra zeros, so in
787 general the resulting string has the form
788
789 [<sign>]<zeros><digits><zeros>[<exponent>]
790
791 where either of the <zeros> pieces could be empty, and there's a
792 decimal point that could appear either in <digits> or in the
793 leading or trailing <zeros>.
794
795 Imagine an infinite 'virtual' string vdigits, consisting of the
796 string 'digits' (starting at index 0) padded on both the left and
797 right with infinite strings of zeros. We want to output a slice
798
799 vdigits[vdigits_start : vdigits_end]
800
801 of this virtual string. Thus if vdigits_start < 0 then we'll end
802 up producing some leading zeros; if vdigits_end > digits_len there
803 will be trailing zeros in the output. The next section of code
804 determines whether to use an exponent or not, figures out the
805 position 'decpt' of the decimal point, and computes 'vdigits_start'
806 and 'vdigits_end'. */
807 vdigits_end = digits_len;
808 switch (format_code) {
809 case 'e':
810 use_exp = 1;
811 vdigits_end = precision;
812 break;
813 case 'f':
814 vdigits_end = decpt + precision;
815 break;
816 case 'g':
817 if (decpt <= -4 || decpt > precision)
818 use_exp = 1;
819 if (use_alt_formatting)
820 vdigits_end = precision;
821 break;
822 case 'r':
823 /* convert to exponential format at 1e16. We used to convert
824 at 1e17, but that gives odd-looking results for some values
825 when a 16-digit 'shortest' repr is padded with bogus zeros.
826 For example, repr(2e16+8) would give 20000000000000010.0;
827 the true value is 20000000000000008.0. */
828 if (decpt <= -4 || decpt > 16)
829 use_exp = 1;
830 break;
831 case 's':
832 /* if we're forcing a digit after the point, convert to
833 exponential format at 1e11. If not, convert at 1e12. */
834 if (decpt <= -4 || decpt >
835 (add_dot_0_if_integer ? precision-1 : precision))
836 use_exp = 1;
837 break;
838 default:
839 PyErr_BadInternalCall();
840 goto exit;
841 }
842
843 /* if using an exponent, reset decimal point position to 1 and adjust
844 exponent accordingly.*/
845 if (use_exp) {
846 exp = decpt - 1;
847 decpt = 1;
848 }
849 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
850 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
851 vdigits_start = decpt <= 0 ? decpt-1 : 0;
852 if (!use_exp && add_dot_0_if_integer)
853 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
854 else
855 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
856
857 /* double check inequalities */
858 assert(vdigits_start <= 0 &&
859 0 <= digits_len &&
860 digits_len <= vdigits_end);
861 /* decimal point should be in (vdigits_start, vdigits_end] */
862 assert(vdigits_start < decpt && decpt <= vdigits_end);
863
864 /* Compute an upper bound how much memory we need. This might be a few
865 chars too long, but no big deal. */
866 bufsize =
867 /* sign, decimal point and trailing 0 byte */
868 3 +
869
870 /* total digit count (including zero padding on both sides) */
871 (vdigits_end - vdigits_start) +
872
873 /* exponent "e+100", max 3 numerical digits */
874 (use_exp ? 5 : 0);
875
876 /* Now allocate the memory and initialize p to point to the start of
877 it. */
878 buf = (char *)PyMem_Malloc(bufsize);
879 if (buf == NULL) {
880 PyErr_NoMemory();
881 goto exit;
882 }
883 p = buf;
884
885 /* Add a negative sign if negative, and a plus sign if non-negative
886 and always_add_sign is true. */
887 if (sign == 1)
888 *p++ = '-';
889 else if (always_add_sign)
890 *p++ = '+';
891
892 /* note that exactly one of the three 'if' conditions is true,
893 so we include exactly one decimal point */
894 /* Zero padding on left of digit string */
895 if (decpt <= 0) {
896 memset(p, '0', decpt-vdigits_start);
897 p += decpt - vdigits_start;
898 *p++ = '.';
899 memset(p, '0', 0-decpt);
900 p += 0-decpt;
901 }
902 else {
903 memset(p, '0', 0-vdigits_start);
904 p += 0 - vdigits_start;
905 }
906
907 /* Digits, with included decimal point */
908 if (0 < decpt && decpt <= digits_len) {
909 strncpy(p, digits, decpt-0);
910 p += decpt-0;
911 *p++ = '.';
912 strncpy(p, digits+decpt, digits_len-decpt);
913 p += digits_len-decpt;
914 }
915 else {
916 strncpy(p, digits, digits_len);
917 p += digits_len;
918 }
919
920 /* And zeros on the right */
921 if (digits_len < decpt) {
922 memset(p, '0', decpt-digits_len);
923 p += decpt-digits_len;
924 *p++ = '.';
925 memset(p, '0', vdigits_end-decpt);
926 p += vdigits_end-decpt;
927 }
928 else {
929 memset(p, '0', vdigits_end-digits_len);
930 p += vdigits_end-digits_len;
931 }
932
933 /* Delete a trailing decimal pt unless using alternative formatting. */
934 if (p[-1] == '.' && !use_alt_formatting)
935 p--;
936
937 /* Now that we've done zero padding, add an exponent if needed. */
938 if (use_exp) {
939 *p++ = float_strings[OFS_E][0];
940 exp_len = sprintf(p, "%+.02d", exp);
941 p += exp_len;
942 }
943 exit:
944 if (buf) {
945 *p = '\0';
946 /* It's too late if this fails, as we've already stepped on
947 memory that isn't ours. But it's an okay debugging test. */
948 assert(p-buf < bufsize);
949 }
950 if (digits)
951 _Py_dg_freedtoa(digits);
952
953 return buf;
954}
955
956
957PyAPI_FUNC(char *) PyOS_double_to_string(double val,
Eric Smith193125a2009-04-16 22:08:31 +0000958 char format_code,
959 int precision,
960 int flags,
Eric Smith0923d1d2009-04-16 20:16:10 +0000961 int *type)
962{
Eric Smith193125a2009-04-16 22:08:31 +0000963 char **float_strings = lc_float_strings;
964 int mode;
Eric Smith0923d1d2009-04-16 20:16:10 +0000965
Eric Smith193125a2009-04-16 22:08:31 +0000966 /* Validate format_code, and map upper and lower case. Compute the
967 mode and make any adjustments as needed. */
Eric Smith0923d1d2009-04-16 20:16:10 +0000968 switch (format_code) {
Eric Smith193125a2009-04-16 22:08:31 +0000969 /* exponent */
Eric Smith0923d1d2009-04-16 20:16:10 +0000970 case 'E':
Eric Smith0923d1d2009-04-16 20:16:10 +0000971 float_strings = uc_float_strings;
Eric Smith193125a2009-04-16 22:08:31 +0000972 format_code = 'e';
973 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +0000974 case 'e':
975 mode = 2;
976 precision++;
977 break;
Eric Smith193125a2009-04-16 22:08:31 +0000978
979 /* fixed */
980 case 'F':
981 float_strings = uc_float_strings;
982 format_code = 'f';
983 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +0000984 case 'f':
985 mode = 3;
986 break;
Eric Smith193125a2009-04-16 22:08:31 +0000987
988 /* general */
989 case 'G':
990 float_strings = uc_float_strings;
991 format_code = 'g';
992 /* Fall through. */
Eric Smith0923d1d2009-04-16 20:16:10 +0000993 case 'g':
994 mode = 2;
995 /* precision 0 makes no sense for 'g' format; interpret as 1 */
996 if (precision == 0)
997 precision = 1;
998 break;
Eric Smith193125a2009-04-16 22:08:31 +0000999
1000 /* repr format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001001 case 'r':
Eric Smith0923d1d2009-04-16 20:16:10 +00001002 mode = 0;
1003 /* Supplied precision is unused, must be 0. */
1004 if (precision != 0) {
1005 PyErr_BadInternalCall();
1006 return NULL;
1007 }
1008 break;
Eric Smith193125a2009-04-16 22:08:31 +00001009
1010 /* str format */
Eric Smith0923d1d2009-04-16 20:16:10 +00001011 case 's':
1012 mode = 2;
1013 /* Supplied precision is unused, must be 0. */
1014 if (precision != 0) {
1015 PyErr_BadInternalCall();
1016 return NULL;
1017 }
1018 precision = 12;
1019 break;
Eric Smith193125a2009-04-16 22:08:31 +00001020
1021 default:
1022 PyErr_BadInternalCall();
1023 return NULL;
Eric Smith0923d1d2009-04-16 20:16:10 +00001024 }
1025
Eric Smith193125a2009-04-16 22:08:31 +00001026 return format_float_short(val, format_code, mode, precision,
Eric Smith0923d1d2009-04-16 20:16:10 +00001027 flags & Py_DTSF_SIGN,
1028 flags & Py_DTSF_ADD_DOT_0,
1029 flags & Py_DTSF_ALT,
1030 float_strings, type);
1031}
1032#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */