blob: 16efa9d3ab8c2987d56a0c6436316ab3ee121e09 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
10#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
11
12
13/**
14 * PyOS_ascii_strtod:
15 * @nptr: the string to convert to a numeric value.
16 * @endptr: if non-%NULL, it returns the character after
17 * the last character used in the conversion.
18 *
19 * Converts a string to a #gdouble value.
20 * This function behaves like the standard strtod() function
21 * does in the C locale. It does this without actually
22 * changing the current locale, since that would not be
23 * thread-safe.
24 *
25 * This function is typically used when reading configuration
26 * files or other non-user input that should be locale independent.
27 * To handle input from the user you should normally use the
28 * locale-sensitive system strtod() function.
29 *
30 * If the correct value would cause overflow, plus or minus %HUGE_VAL
31 * is returned (according to the sign of the value), and %ERANGE is
32 * stored in %errno. If the correct value would cause underflow,
33 * zero is returned and %ERANGE is stored in %errno.
Georg Brandlb569ee42006-05-29 14:28:05 +000034 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000035 *
36 * This function resets %errno before calling strtod() so that
37 * you can reliably detect overflow and underflow.
38 *
39 * Return value: the #gdouble value.
40 **/
41double
Neal Norwitze7214a12005-12-18 05:03:17 +000042PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000043{
44 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000045 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000046 struct lconv *locale_data;
47 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000048 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000049 const char *p, *decimal_point_pos;
50 const char *end = NULL; /* Silence gcc */
Guido van Rossum3b835492008-01-05 00:59:59 +000051 const char *digits_pos = NULL;
52 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000053
Martin v. Löwis737ea822004-06-08 18:52:54 +000054 assert(nptr != NULL);
55
56 fail_pos = NULL;
57
58 locale_data = localeconv();
59 decimal_point = locale_data->decimal_point;
60 decimal_point_len = strlen(decimal_point);
61
62 assert(decimal_point_len != 0);
63
64 decimal_point_pos = NULL;
Guido van Rossum3b835492008-01-05 00:59:59 +000065
66 /* We process any leading whitespace and the optional sign manually,
67 then pass the remainder to the system strtod. This ensures that
68 the result of an underflow has the correct sign. (bug #1725) */
69
70 p = nptr;
71 /* Skip leading space */
72 while (ISSPACE(*p))
73 p++;
74
75 /* Process leading sign, if present */
76 if (*p == '-') {
77 negate = 1;
78 p++;
79 } else if (*p == '+') {
80 p++;
81 }
82
83 /* What's left should begin with a digit, a decimal point, or one of
84 the letters i, I, n, N. It should not begin with 0x or 0X */
85 if ((!ISDIGIT(*p) &&
86 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
87 ||
88 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
89 {
90 if (endptr)
91 *endptr = (char*)nptr;
92 errno = EINVAL;
93 return val;
94 }
95 digits_pos = p;
96
Martin v. Löwis737ea822004-06-08 18:52:54 +000097 if (decimal_point[0] != '.' ||
98 decimal_point[1] != 0)
99 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000100 while (ISDIGIT(*p))
101 p++;
102
103 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000104 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000105 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000106
Martin v. Löwis737ea822004-06-08 18:52:54 +0000107 while (ISDIGIT(*p))
108 p++;
109
Neal Norwitze7214a12005-12-18 05:03:17 +0000110 if (*p == 'e' || *p == 'E')
111 p++;
112 if (*p == '+' || *p == '-')
113 p++;
114 while (ISDIGIT(*p))
115 p++;
116 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000117 }
Martin v. Löwisfcfff0a2006-07-03 12:19:50 +0000118 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
119 {
120 /* Python bug #1417699 */
Guido van Rossum3b835492008-01-05 00:59:59 +0000121 if (endptr)
122 *endptr = (char*)nptr;
Martin v. Löwisfcfff0a2006-07-03 12:19:50 +0000123 errno = EINVAL;
124 return val;
125 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000126 /* For the other cases, we need not convert the decimal point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000127 }
128
Neal Norwitze7214a12005-12-18 05:03:17 +0000129 /* Set errno to zero, so that we can distinguish zero results
130 and underflows */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000131 errno = 0;
132
133 if (decimal_point_pos)
134 {
135 char *copy, *c;
136
Neal Norwitze7214a12005-12-18 05:03:17 +0000137 /* We need to convert the '.' to the locale specific decimal point */
Guido van Rossum3b835492008-01-05 00:59:59 +0000138 copy = (char *)PyMem_MALLOC(end - digits_pos +
139 1 + decimal_point_len);
Georg Brandlb569ee42006-05-29 14:28:05 +0000140 if (copy == NULL) {
141 if (endptr)
Georg Brandl80181e22006-05-29 14:33:55 +0000142 *endptr = (char *)nptr;
Georg Brandlb569ee42006-05-29 14:28:05 +0000143 errno = ENOMEM;
144 return val;
145 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000146
147 c = copy;
Guido van Rossum3b835492008-01-05 00:59:59 +0000148 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
149 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000150 memcpy(c, decimal_point, decimal_point_len);
151 c += decimal_point_len;
152 memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
153 c += end - (decimal_point_pos + 1);
154 *c = 0;
155
156 val = strtod(copy, &fail_pos);
157
158 if (fail_pos)
159 {
160 if (fail_pos > decimal_point_pos)
Guido van Rossum3b835492008-01-05 00:59:59 +0000161 fail_pos = (char *)digits_pos +
162 (fail_pos - copy) -
163 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000164 else
Guido van Rossum3b835492008-01-05 00:59:59 +0000165 fail_pos = (char *)digits_pos +
166 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000167 }
168
Brett Cannon0ed05872006-05-25 20:44:08 +0000169 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000170
171 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000172 else {
Guido van Rossum3b835492008-01-05 00:59:59 +0000173 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000174 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000175
Guido van Rossum3b835492008-01-05 00:59:59 +0000176 if (fail_pos == digits_pos)
177 fail_pos = (char *)nptr;
178
179 if (negate && fail_pos != nptr)
180 val = -val;
181
Martin v. Löwis737ea822004-06-08 18:52:54 +0000182 if (endptr)
183 *endptr = fail_pos;
184
185 return val;
186}
187
188
Eric Smith7ef40bf2008-02-20 23:34:22 +0000189/* From the C99 standard, section 7.19.6:
190The exponent always contains at least two digits, and only as many more digits
191as necessary to represent the exponent.
192*/
193#define MIN_EXPONENT_DIGITS 2
194
195/* see FORMATBUFLEN in unicodeobject.c */
196#define FLOAT_FORMATBUFLEN 120
197
Martin v. Löwis737ea822004-06-08 18:52:54 +0000198/**
199 * PyOS_ascii_formatd:
200 * @buffer: A buffer to place the resulting string in
201 * @buf_len: The length of the buffer.
202 * @format: The printf()-style format to use for the
203 * code to use for converting.
204 * @d: The #gdouble to convert
205 *
206 * Converts a #gdouble to a string, using the '.' as
207 * decimal point. To format the number you pass in
208 * a printf()-style format string. Allowed conversion
Eric Smith7ef40bf2008-02-20 23:34:22 +0000209 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000210 *
Eric Smith7ef40bf2008-02-20 23:34:22 +0000211 * 'n' is the same as 'g', except it uses the current locale.
212 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000213 * Return value: The pointer to the buffer with the converted string.
214 **/
215char *
216PyOS_ascii_formatd(char *buffer,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000217 size_t buf_len,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000218 const char *format,
219 double d)
220{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000221 char *p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000222 char format_char;
Eric Smith7ef40bf2008-02-20 23:34:22 +0000223 size_t format_len = strlen(format);
224
225 /* For type 'n', we need to make a copy of the format string, because
226 we're going to modify 'n' -> 'g', and format is const char*, so we
227 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
228 we ever need this to be. There's an upcoming check to ensure it's
229 big enough. */
230 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000231
232/* g_return_val_if_fail (buffer != NULL, NULL); */
233/* g_return_val_if_fail (format[0] == '%', NULL); */
234/* g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */
235
Eric Smith7ef40bf2008-02-20 23:34:22 +0000236 /* The last character in the format string must be the format char */
237 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000238
239/* g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */
240/* format_char == 'f' || format_char == 'F' || */
241/* format_char == 'g' || format_char == 'G', */
242/* NULL); */
243
244 if (format[0] != '%')
245 return NULL;
246
Eric Smith7ef40bf2008-02-20 23:34:22 +0000247 /* I'm not sure why this test is here. It's ensuring that the format
248 string after the first character doesn't have a single quote, a
249 lowercase l, or a percent. This is the reverse of the commented-out
250 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000251 if (strpbrk(format + 1, "'l%"))
252 return NULL;
253
254 if (!(format_char == 'e' || format_char == 'E' ||
255 format_char == 'f' || format_char == 'F' ||
Eric Smith7ef40bf2008-02-20 23:34:22 +0000256 format_char == 'g' || format_char == 'G' ||
257 format_char == 'n'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000258 return NULL;
259
Eric Smith7ef40bf2008-02-20 23:34:22 +0000260 /* Map 'n' format_char to 'g', by copying the format string and
261 replacing the final 'n' with a 'g' */
262 if (format_char == 'n') {
263 if (format_len + 1 >= sizeof(tmp_format)) {
264 /* The format won't fit in our copy. Error out. In
265 practice, this will never happen and will be detected
266 by returning NULL */
267 return NULL;
268 }
269 strcpy(tmp_format, format);
270 tmp_format[format_len - 1] = 'g';
271 format = tmp_format;
272 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000273
Eric Smith7ef40bf2008-02-20 23:34:22 +0000274 /* Have PyOS_snprintf do the hard work */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000275 PyOS_snprintf(buffer, buf_len, format, d);
276
Eric Smith7ef40bf2008-02-20 23:34:22 +0000277 /* Get the current local, and find the decimal point character (or
278 string?). Convert that string back to a dot. Do not do this if
279 using the 'n' (number) format code. */
280 if (format_char != 'n') {
281 struct lconv *locale_data = localeconv();
282 const char *decimal_point = locale_data->decimal_point;
283 size_t decimal_point_len = strlen(decimal_point);
284 size_t rest_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000285
Eric Smith7ef40bf2008-02-20 23:34:22 +0000286 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000287
Eric Smith7ef40bf2008-02-20 23:34:22 +0000288 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
289 p = buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000290
Eric Smith7ef40bf2008-02-20 23:34:22 +0000291 if (*p == '+' || *p == '-')
292 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000293
Eric Smith7ef40bf2008-02-20 23:34:22 +0000294 while (isdigit(Py_CHARMASK(*p)))
295 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000296
Eric Smith7ef40bf2008-02-20 23:34:22 +0000297 if (strncmp(p, decimal_point, decimal_point_len) == 0) {
298 *p = '.';
299 p++;
300 if (decimal_point_len > 1) {
301 rest_len = strlen(p +
302 (decimal_point_len - 1));
303 memmove(p, p + (decimal_point_len - 1),
304 rest_len);
305 p[rest_len] = 0;
306 }
307 }
308 }
309 }
310
311 /* If an exponent exists, ensure that the exponent is at least
312 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
313 for the extra zeros. Also, if there are more than
314 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
315 back to MIN_EXPONENT_DIGITS */
316 p = strpbrk(buffer, "eE");
317 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
318 char *start = p + 2;
319 int exponent_digit_cnt = 0;
320 int leading_zero_cnt = 0;
321 int in_leading_zeros = 1;
322 int significant_digit_cnt;
323
324 p += 2;
325 while (*p && isdigit(Py_CHARMASK(*p))) {
326 if (in_leading_zeros && *p == '0')
327 ++leading_zero_cnt;
328 if (*p != '0')
329 in_leading_zeros = 0;
330 ++p;
331 ++exponent_digit_cnt;
332 }
333
334 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
335 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
336 /* If there are 2 exactly digits, we're done,
337 regardless of what they contain */
338 }
339 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
340 int extra_zeros_cnt;
341
342 /* There are more than 2 digits in the exponent. See
343 if we can delete some of the leading zeros */
344 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
345 significant_digit_cnt = MIN_EXPONENT_DIGITS;
346 extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt;
347
348 /* Delete extra_zeros_cnt worth of characters from the
349 front of the exponent */
350 assert(extra_zeros_cnt >= 0);
351
352 /* Add one to significant_digit_cnt to copy the
353 trailing 0 byte, thus setting the length */
354 memmove(start,
355 start + extra_zeros_cnt,
356 significant_digit_cnt + 1);
357 }
358 else {
359 /* If there are fewer than 2 digits, add zeros
360 until there are 2, if there's enough room */
361 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
362 if (start + zeros + exponent_digit_cnt + 1
363 < buffer + buf_len) {
364 memmove(start + zeros, start,
365 exponent_digit_cnt + 1);
366 memset(start, '0', zeros);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000367 }
368 }
369 }
370
371 return buffer;
372}
373
374double
375PyOS_ascii_atof(const char *nptr)
376{
377 return PyOS_ascii_strtod(nptr, NULL);
378}