blob: e68c46086fa2d480314de774edac8c53ef619aed [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
10#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
11
12
13/**
14 * PyOS_ascii_strtod:
15 * @nptr: the string to convert to a numeric value.
16 * @endptr: if non-%NULL, it returns the character after
17 * the last character used in the conversion.
18 *
19 * Converts a string to a #gdouble value.
20 * This function behaves like the standard strtod() function
21 * does in the C locale. It does this without actually
22 * changing the current locale, since that would not be
23 * thread-safe.
24 *
25 * This function is typically used when reading configuration
26 * files or other non-user input that should be locale independent.
27 * To handle input from the user you should normally use the
28 * locale-sensitive system strtod() function.
29 *
30 * If the correct value would cause overflow, plus or minus %HUGE_VAL
31 * is returned (according to the sign of the value), and %ERANGE is
32 * stored in %errno. If the correct value would cause underflow,
33 * zero is returned and %ERANGE is stored in %errno.
Georg Brandlb569ee42006-05-29 14:28:05 +000034 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000035 *
36 * This function resets %errno before calling strtod() so that
37 * you can reliably detect overflow and underflow.
38 *
39 * Return value: the #gdouble value.
40 **/
41double
Neal Norwitze7214a12005-12-18 05:03:17 +000042PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000043{
44 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000045 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000046 struct lconv *locale_data;
47 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000048 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000049 const char *p, *decimal_point_pos;
50 const char *end = NULL; /* Silence gcc */
Guido van Rossum3b835492008-01-05 00:59:59 +000051 const char *digits_pos = NULL;
52 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000053
Martin v. Löwis737ea822004-06-08 18:52:54 +000054 assert(nptr != NULL);
55
56 fail_pos = NULL;
57
58 locale_data = localeconv();
59 decimal_point = locale_data->decimal_point;
60 decimal_point_len = strlen(decimal_point);
61
62 assert(decimal_point_len != 0);
63
64 decimal_point_pos = NULL;
Guido van Rossum3b835492008-01-05 00:59:59 +000065
66 /* We process any leading whitespace and the optional sign manually,
67 then pass the remainder to the system strtod. This ensures that
68 the result of an underflow has the correct sign. (bug #1725) */
69
70 p = nptr;
71 /* Skip leading space */
72 while (ISSPACE(*p))
73 p++;
74
75 /* Process leading sign, if present */
76 if (*p == '-') {
77 negate = 1;
78 p++;
79 } else if (*p == '+') {
80 p++;
81 }
82
83 /* What's left should begin with a digit, a decimal point, or one of
84 the letters i, I, n, N. It should not begin with 0x or 0X */
85 if ((!ISDIGIT(*p) &&
86 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
87 ||
88 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
89 {
90 if (endptr)
91 *endptr = (char*)nptr;
92 errno = EINVAL;
93 return val;
94 }
95 digits_pos = p;
96
Martin v. Löwis737ea822004-06-08 18:52:54 +000097 if (decimal_point[0] != '.' ||
98 decimal_point[1] != 0)
99 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000100 while (ISDIGIT(*p))
101 p++;
102
103 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000104 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000105 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000106
Martin v. Löwis737ea822004-06-08 18:52:54 +0000107 while (ISDIGIT(*p))
108 p++;
109
Neal Norwitze7214a12005-12-18 05:03:17 +0000110 if (*p == 'e' || *p == 'E')
111 p++;
112 if (*p == '+' || *p == '-')
113 p++;
114 while (ISDIGIT(*p))
115 p++;
116 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000117 }
Martin v. Löwisfcfff0a2006-07-03 12:19:50 +0000118 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
119 {
120 /* Python bug #1417699 */
Guido van Rossum3b835492008-01-05 00:59:59 +0000121 if (endptr)
122 *endptr = (char*)nptr;
Martin v. Löwisfcfff0a2006-07-03 12:19:50 +0000123 errno = EINVAL;
124 return val;
125 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000126 /* For the other cases, we need not convert the decimal point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000127 }
128
Neal Norwitze7214a12005-12-18 05:03:17 +0000129 /* Set errno to zero, so that we can distinguish zero results
130 and underflows */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000131 errno = 0;
132
133 if (decimal_point_pos)
134 {
135 char *copy, *c;
136
Neal Norwitze7214a12005-12-18 05:03:17 +0000137 /* We need to convert the '.' to the locale specific decimal point */
Guido van Rossum3b835492008-01-05 00:59:59 +0000138 copy = (char *)PyMem_MALLOC(end - digits_pos +
139 1 + decimal_point_len);
Georg Brandlb569ee42006-05-29 14:28:05 +0000140 if (copy == NULL) {
141 if (endptr)
Georg Brandl80181e22006-05-29 14:33:55 +0000142 *endptr = (char *)nptr;
Georg Brandlb569ee42006-05-29 14:28:05 +0000143 errno = ENOMEM;
144 return val;
145 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000146
147 c = copy;
Guido van Rossum3b835492008-01-05 00:59:59 +0000148 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
149 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000150 memcpy(c, decimal_point, decimal_point_len);
151 c += decimal_point_len;
152 memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
153 c += end - (decimal_point_pos + 1);
154 *c = 0;
155
156 val = strtod(copy, &fail_pos);
157
158 if (fail_pos)
159 {
160 if (fail_pos > decimal_point_pos)
Guido van Rossum3b835492008-01-05 00:59:59 +0000161 fail_pos = (char *)digits_pos +
162 (fail_pos - copy) -
163 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000164 else
Guido van Rossum3b835492008-01-05 00:59:59 +0000165 fail_pos = (char *)digits_pos +
166 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000167 }
168
Brett Cannon0ed05872006-05-25 20:44:08 +0000169 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000170
171 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000172 else {
Guido van Rossum3b835492008-01-05 00:59:59 +0000173 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000174 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000175
Guido van Rossum3b835492008-01-05 00:59:59 +0000176 if (fail_pos == digits_pos)
177 fail_pos = (char *)nptr;
178
179 if (negate && fail_pos != nptr)
180 val = -val;
181
Martin v. Löwis737ea822004-06-08 18:52:54 +0000182 if (endptr)
183 *endptr = fail_pos;
184
185 return val;
186}
187
188
Eric Smith7ef40bf2008-02-20 23:34:22 +0000189/* From the C99 standard, section 7.19.6:
190The exponent always contains at least two digits, and only as many more digits
191as necessary to represent the exponent.
192*/
193#define MIN_EXPONENT_DIGITS 2
194
195/* see FORMATBUFLEN in unicodeobject.c */
196#define FLOAT_FORMATBUFLEN 120
197
Martin v. Löwis737ea822004-06-08 18:52:54 +0000198/**
199 * PyOS_ascii_formatd:
200 * @buffer: A buffer to place the resulting string in
Eric Smith8113ca62008-03-17 11:01:01 +0000201 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000202 * @format: The printf()-style format to use for the
203 * code to use for converting.
204 * @d: The #gdouble to convert
205 *
206 * Converts a #gdouble to a string, using the '.' as
207 * decimal point. To format the number you pass in
208 * a printf()-style format string. Allowed conversion
Eric Smith7ef40bf2008-02-20 23:34:22 +0000209 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000210 *
Eric Smith7ef40bf2008-02-20 23:34:22 +0000211 * 'n' is the same as 'g', except it uses the current locale.
Eric Smith8113ca62008-03-17 11:01:01 +0000212 * 'Z' is the same as 'g', except it always has a decimal and
213 * at least one digit after the decimal.
Eric Smith7ef40bf2008-02-20 23:34:22 +0000214 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000215 * Return value: The pointer to the buffer with the converted string.
216 **/
217char *
218PyOS_ascii_formatd(char *buffer,
Eric Smith8113ca62008-03-17 11:01:01 +0000219 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000220 const char *format,
221 double d)
222{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000223 char *p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000224 char format_char;
Eric Smith7ef40bf2008-02-20 23:34:22 +0000225 size_t format_len = strlen(format);
226
227 /* For type 'n', we need to make a copy of the format string, because
228 we're going to modify 'n' -> 'g', and format is const char*, so we
229 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
230 we ever need this to be. There's an upcoming check to ensure it's
231 big enough. */
Eric Smith8113ca62008-03-17 11:01:01 +0000232 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
233 also with at least one character past the decimal. */
Eric Smith7ef40bf2008-02-20 23:34:22 +0000234 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000235
Eric Smith7ef40bf2008-02-20 23:34:22 +0000236 /* The last character in the format string must be the format char */
237 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000238
Martin v. Löwis737ea822004-06-08 18:52:54 +0000239 if (format[0] != '%')
240 return NULL;
241
Eric Smith7ef40bf2008-02-20 23:34:22 +0000242 /* I'm not sure why this test is here. It's ensuring that the format
243 string after the first character doesn't have a single quote, a
244 lowercase l, or a percent. This is the reverse of the commented-out
245 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000246 if (strpbrk(format + 1, "'l%"))
247 return NULL;
248
Eric Smith8113ca62008-03-17 11:01:01 +0000249 /* Also curious about this function is that it accepts format strings
250 like "%xg", which are invalid for floats. In general, the
251 interface to this function is not very good, but changing it is
252 difficult because it's a public API. */
253
Martin v. Löwis737ea822004-06-08 18:52:54 +0000254 if (!(format_char == 'e' || format_char == 'E' ||
255 format_char == 'f' || format_char == 'F' ||
Eric Smith7ef40bf2008-02-20 23:34:22 +0000256 format_char == 'g' || format_char == 'G' ||
Eric Smith8113ca62008-03-17 11:01:01 +0000257 format_char == 'n' || format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000258 return NULL;
259
Eric Smith8113ca62008-03-17 11:01:01 +0000260 /* Map 'n' or 'Z' format_char to 'g', by copying the format string and
261 replacing the final char with a 'g' */
262 if (format_char == 'n' || format_char == 'Z') {
Eric Smith7ef40bf2008-02-20 23:34:22 +0000263 if (format_len + 1 >= sizeof(tmp_format)) {
264 /* The format won't fit in our copy. Error out. In
265 practice, this will never happen and will be detected
266 by returning NULL */
267 return NULL;
268 }
269 strcpy(tmp_format, format);
270 tmp_format[format_len - 1] = 'g';
271 format = tmp_format;
272 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000273
Eric Smith8113ca62008-03-17 11:01:01 +0000274
Eric Smith7ef40bf2008-02-20 23:34:22 +0000275 /* Have PyOS_snprintf do the hard work */
Eric Smith8113ca62008-03-17 11:01:01 +0000276 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000277
Eric Smith7ef40bf2008-02-20 23:34:22 +0000278 /* Get the current local, and find the decimal point character (or
279 string?). Convert that string back to a dot. Do not do this if
280 using the 'n' (number) format code. */
281 if (format_char != 'n') {
282 struct lconv *locale_data = localeconv();
283 const char *decimal_point = locale_data->decimal_point;
284 size_t decimal_point_len = strlen(decimal_point);
285 size_t rest_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000286
Eric Smith7ef40bf2008-02-20 23:34:22 +0000287 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000288
Eric Smith7ef40bf2008-02-20 23:34:22 +0000289 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
290 p = buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000291
Eric Smith7ef40bf2008-02-20 23:34:22 +0000292 if (*p == '+' || *p == '-')
293 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000294
Eric Smith7ef40bf2008-02-20 23:34:22 +0000295 while (isdigit(Py_CHARMASK(*p)))
296 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000297
Eric Smith7ef40bf2008-02-20 23:34:22 +0000298 if (strncmp(p, decimal_point, decimal_point_len) == 0) {
299 *p = '.';
300 p++;
301 if (decimal_point_len > 1) {
302 rest_len = strlen(p +
303 (decimal_point_len - 1));
304 memmove(p, p + (decimal_point_len - 1),
305 rest_len);
306 p[rest_len] = 0;
307 }
308 }
309 }
310 }
311
312 /* If an exponent exists, ensure that the exponent is at least
313 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
314 for the extra zeros. Also, if there are more than
315 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
316 back to MIN_EXPONENT_DIGITS */
317 p = strpbrk(buffer, "eE");
318 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
319 char *start = p + 2;
320 int exponent_digit_cnt = 0;
321 int leading_zero_cnt = 0;
322 int in_leading_zeros = 1;
323 int significant_digit_cnt;
324
325 p += 2;
326 while (*p && isdigit(Py_CHARMASK(*p))) {
327 if (in_leading_zeros && *p == '0')
328 ++leading_zero_cnt;
329 if (*p != '0')
330 in_leading_zeros = 0;
331 ++p;
332 ++exponent_digit_cnt;
333 }
334
335 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
336 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
337 /* If there are 2 exactly digits, we're done,
338 regardless of what they contain */
339 }
340 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
341 int extra_zeros_cnt;
342
343 /* There are more than 2 digits in the exponent. See
344 if we can delete some of the leading zeros */
345 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
346 significant_digit_cnt = MIN_EXPONENT_DIGITS;
347 extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt;
348
349 /* Delete extra_zeros_cnt worth of characters from the
350 front of the exponent */
351 assert(extra_zeros_cnt >= 0);
352
353 /* Add one to significant_digit_cnt to copy the
354 trailing 0 byte, thus setting the length */
355 memmove(start,
356 start + extra_zeros_cnt,
357 significant_digit_cnt + 1);
358 }
359 else {
360 /* If there are fewer than 2 digits, add zeros
361 until there are 2, if there's enough room */
362 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
363 if (start + zeros + exponent_digit_cnt + 1
Eric Smith8113ca62008-03-17 11:01:01 +0000364 < buffer + buf_size) {
Eric Smith7ef40bf2008-02-20 23:34:22 +0000365 memmove(start + zeros, start,
366 exponent_digit_cnt + 1);
367 memset(start, '0', zeros);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000368 }
369 }
370 }
371
Eric Smith8113ca62008-03-17 11:01:01 +0000372 /* If format_char is 'Z', make sure we have at least one character
373 after the decimal point (and make sure we have a decimal point). */
374 if (format_char == 'Z') {
375 int insert_count = 0;
376 char* chars_to_insert;
377
378 /* search for the first non-digit character */
379 p = buffer;
380 while (*p && isdigit(Py_CHARMASK(*p)))
381 ++p;
382
383 if (*p == '.') {
384 if (isdigit(Py_CHARMASK(*(p+1)))) {
385 /* Nothing to do, we already have a decimal
386 point and a digit after it */
387 }
388 else {
389 /* We have a decimal point, but no following
390 digit. Insert a zero after the decimal. */
391 ++p;
392 chars_to_insert = "0";
393 insert_count = 1;
394 }
395 }
396 else {
397 chars_to_insert = ".0";
398 insert_count = 2;
399 }
400 if (insert_count) {
401 size_t buf_len = strlen(buffer);
402 if (buf_len + insert_count + 1 >= buf_size) {
403 /* If there is not enough room in the buffer
404 for the additional text, just skip it. It's
405 not worth generating an error over. */
406 }
407 else {
408 memmove(p + insert_count, p,
409 buffer + strlen(buffer) - p + 1);
410 memcpy(p, chars_to_insert, insert_count);
411 }
412 }
413 }
414
Martin v. Löwis737ea822004-06-08 18:52:54 +0000415 return buffer;
416}
417
418double
419PyOS_ascii_atof(const char *nptr)
420{
421 return PyOS_ascii_strtod(nptr, NULL);
422}