| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 1 | /* -*- Mode: C; c-file-style: "python" -*- */ | 
 | 2 |  | 
 | 3 | #include <Python.h> | 
 | 4 | #include <locale.h> | 
 | 5 |  | 
 | 6 | /* ascii character tests (as opposed to locale tests) */ | 
 | 7 | #define ISSPACE(c)  ((c) == ' ' || (c) == '\f' || (c) == '\n' || \ | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 8 |              (c) == '\r' || (c) == '\t' || (c) == '\v') | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 9 | #define ISDIGIT(c)  ((c) >= '0' && (c) <= '9') | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 10 |  | 
 | 11 |  | 
 | 12 | /** | 
 | 13 |  * PyOS_ascii_strtod: | 
 | 14 |  * @nptr:    the string to convert to a numeric value. | 
 | 15 |  * @endptr:  if non-%NULL, it returns the character after | 
 | 16 |  *           the last character used in the conversion. | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 17 |  * | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 18 |  * Converts a string to a #gdouble value. | 
 | 19 |  * This function behaves like the standard strtod() function | 
 | 20 |  * does in the C locale. It does this without actually | 
 | 21 |  * changing the current locale, since that would not be | 
 | 22 |  * thread-safe. | 
 | 23 |  * | 
 | 24 |  * This function is typically used when reading configuration | 
 | 25 |  * files or other non-user input that should be locale independent. | 
 | 26 |  * To handle input from the user you should normally use the | 
 | 27 |  * locale-sensitive system strtod() function. | 
 | 28 |  * | 
 | 29 |  * If the correct value would cause overflow, plus or minus %HUGE_VAL | 
 | 30 |  * is returned (according to the sign of the value), and %ERANGE is | 
 | 31 |  * stored in %errno. If the correct value would cause underflow, | 
 | 32 |  * zero is returned and %ERANGE is stored in %errno. | 
| Georg Brandl | b569ee4 | 2006-05-29 14:28:05 +0000 | [diff] [blame] | 33 |  * If memory allocation fails, %ENOMEM is stored in %errno. | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 34 |  * | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 35 |  * This function resets %errno before calling strtod() so that | 
 | 36 |  * you can reliably detect overflow and underflow. | 
 | 37 |  * | 
 | 38 |  * Return value: the #gdouble value. | 
 | 39 |  **/ | 
 | 40 | double | 
| Neal Norwitz | e7214a1 | 2005-12-18 05:03:17 +0000 | [diff] [blame] | 41 | PyOS_ascii_strtod(const char *nptr, char **endptr) | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 42 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 43 |     char *fail_pos; | 
 | 44 |     double val = -1.0; | 
 | 45 |     struct lconv *locale_data; | 
 | 46 |     const char *decimal_point; | 
 | 47 |     size_t decimal_point_len; | 
 | 48 |     const char *p, *decimal_point_pos; | 
 | 49 |     const char *end = NULL; /* Silence gcc */ | 
 | 50 |     const char *digits_pos = NULL; | 
 | 51 |     int negate = 0; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 52 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 53 |     assert(nptr != NULL); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 54 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 55 |     fail_pos = NULL; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 56 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 57 |     locale_data = localeconv(); | 
 | 58 |     decimal_point = locale_data->decimal_point; | 
 | 59 |     decimal_point_len = strlen(decimal_point); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 60 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 61 |     assert(decimal_point_len != 0); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 62 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 63 |     decimal_point_pos = NULL; | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 64 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 65 |     /* We process any leading whitespace and the optional sign manually, | 
 | 66 |        then pass the remainder to the system strtod.  This ensures that | 
 | 67 |        the result of an underflow has the correct sign. (bug #1725)  */ | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 68 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 69 |     p = nptr; | 
 | 70 |     /* Skip leading space */ | 
 | 71 |     while (ISSPACE(*p)) | 
 | 72 |         p++; | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 73 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 74 |     /* Process leading sign, if present */ | 
 | 75 |     if (*p == '-') { | 
 | 76 |         negate = 1; | 
 | 77 |         p++; | 
 | 78 |     } else if (*p == '+') { | 
 | 79 |         p++; | 
 | 80 |     } | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 81 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 82 |     /* What's left should begin with a digit, a decimal point, or one of | 
 | 83 |        the letters i, I, n, N. It should not begin with 0x or 0X */ | 
 | 84 |     if ((!ISDIGIT(*p) && | 
 | 85 |          *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N') | 
 | 86 |         || | 
 | 87 |         (*p == '0' && (p[1] == 'x' || p[1] == 'X'))) | 
 | 88 |     { | 
 | 89 |         if (endptr) | 
 | 90 |             *endptr = (char*)nptr; | 
 | 91 |         errno = EINVAL; | 
 | 92 |         return val; | 
 | 93 |     } | 
 | 94 |     digits_pos = p; | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 95 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 96 |     if (decimal_point[0] != '.' || | 
 | 97 |         decimal_point[1] != 0) | 
 | 98 |     { | 
 | 99 |         while (ISDIGIT(*p)) | 
 | 100 |             p++; | 
| Neal Norwitz | e7214a1 | 2005-12-18 05:03:17 +0000 | [diff] [blame] | 101 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 102 |         if (*p == '.') | 
 | 103 |         { | 
 | 104 |             decimal_point_pos = p++; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 105 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 106 |             while (ISDIGIT(*p)) | 
 | 107 |                 p++; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 108 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 109 |             if (*p == 'e' || *p == 'E') | 
 | 110 |                 p++; | 
 | 111 |             if (*p == '+' || *p == '-') | 
 | 112 |                 p++; | 
 | 113 |             while (ISDIGIT(*p)) | 
 | 114 |                 p++; | 
 | 115 |             end = p; | 
 | 116 |         } | 
 | 117 |         else if (strncmp(p, decimal_point, decimal_point_len) == 0) | 
 | 118 |         { | 
 | 119 |             /* Python bug #1417699 */ | 
 | 120 |             if (endptr) | 
 | 121 |                 *endptr = (char*)nptr; | 
 | 122 |             errno = EINVAL; | 
 | 123 |             return val; | 
 | 124 |         } | 
 | 125 |         /* For the other cases, we need not convert the decimal | 
 | 126 |            point */ | 
 | 127 |     } | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 128 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 129 |     /* Set errno to zero, so that we can distinguish zero results | 
 | 130 |        and underflows */ | 
 | 131 |     errno = 0; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 132 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 133 |     if (decimal_point_pos) | 
 | 134 |     { | 
 | 135 |         char *copy, *c; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 136 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 137 |         /* We need to convert the '.' to the locale specific decimal | 
 | 138 |            point */ | 
 | 139 |         copy = (char *)PyMem_MALLOC(end - digits_pos + | 
 | 140 |                                     1 + decimal_point_len); | 
 | 141 |         if (copy == NULL) { | 
 | 142 |             if (endptr) | 
 | 143 |                 *endptr = (char *)nptr; | 
 | 144 |             errno = ENOMEM; | 
 | 145 |             return val; | 
 | 146 |         } | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 147 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 148 |         c = copy; | 
 | 149 |         memcpy(c, digits_pos, decimal_point_pos - digits_pos); | 
 | 150 |         c += decimal_point_pos - digits_pos; | 
 | 151 |         memcpy(c, decimal_point, decimal_point_len); | 
 | 152 |         c += decimal_point_len; | 
 | 153 |         memcpy(c, decimal_point_pos + 1, | 
 | 154 |                end - (decimal_point_pos + 1)); | 
 | 155 |         c += end - (decimal_point_pos + 1); | 
 | 156 |         *c = 0; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 157 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 158 |         val = strtod(copy, &fail_pos); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 159 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 160 |         if (fail_pos) | 
 | 161 |         { | 
 | 162 |             if (fail_pos > decimal_point_pos) | 
 | 163 |                 fail_pos = (char *)digits_pos + | 
 | 164 |                     (fail_pos - copy) - | 
 | 165 |                     (decimal_point_len - 1); | 
 | 166 |             else | 
 | 167 |                 fail_pos = (char *)digits_pos + | 
 | 168 |                     (fail_pos - copy); | 
 | 169 |         } | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 170 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 171 |         PyMem_FREE(copy); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 172 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 173 |     } | 
 | 174 |     else { | 
 | 175 |         val = strtod(digits_pos, &fail_pos); | 
 | 176 |     } | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 177 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 178 |     if (fail_pos == digits_pos) | 
 | 179 |         fail_pos = (char *)nptr; | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 180 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 181 |     if (negate && fail_pos != nptr) | 
 | 182 |         val = -val; | 
| Guido van Rossum | 3b83549 | 2008-01-05 00:59:59 +0000 | [diff] [blame] | 183 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 184 |     if (endptr) | 
 | 185 |         *endptr = fail_pos; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 186 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 187 |     return val; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 188 | } | 
 | 189 |  | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 190 | /* Given a string that may have a decimal point in the current | 
 | 191 |    locale, change it back to a dot.  Since the string cannot get | 
 | 192 |    longer, no need for a maximum buffer size parameter. */ | 
 | 193 | Py_LOCAL_INLINE(void) | 
 | 194 | change_decimal_from_locale_to_dot(char* buffer) | 
 | 195 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 196 |     struct lconv *locale_data = localeconv(); | 
 | 197 |     const char *decimal_point = locale_data->decimal_point; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 198 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 199 |     if (decimal_point[0] != '.' || decimal_point[1] != 0) { | 
 | 200 |         size_t decimal_point_len = strlen(decimal_point); | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 201 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 202 |         if (*buffer == '+' || *buffer == '-') | 
 | 203 |             buffer++; | 
 | 204 |         while (isdigit(Py_CHARMASK(*buffer))) | 
 | 205 |             buffer++; | 
 | 206 |         if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { | 
 | 207 |             *buffer = '.'; | 
 | 208 |             buffer++; | 
 | 209 |             if (decimal_point_len > 1) { | 
 | 210 |                 /* buffer needs to get smaller */ | 
 | 211 |                 size_t rest_len = strlen(buffer + | 
 | 212 |                                      (decimal_point_len - 1)); | 
 | 213 |                 memmove(buffer, | 
 | 214 |                     buffer + (decimal_point_len - 1), | 
 | 215 |                     rest_len); | 
 | 216 |                 buffer[rest_len] = 0; | 
 | 217 |             } | 
 | 218 |         } | 
 | 219 |     } | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 220 | } | 
 | 221 |  | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 222 |  | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 223 | /* From the C99 standard, section 7.19.6: | 
 | 224 | The exponent always contains at least two digits, and only as many more digits | 
 | 225 | as necessary to represent the exponent. | 
 | 226 | */ | 
 | 227 | #define MIN_EXPONENT_DIGITS 2 | 
 | 228 |  | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 229 | /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS | 
 | 230 |    in length. */ | 
 | 231 | Py_LOCAL_INLINE(void) | 
| Mark Dickinson | 9a0517c | 2009-04-26 19:59:00 +0000 | [diff] [blame] | 232 | ensure_minimum_exponent_length(char* buffer, size_t buf_size) | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 233 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 234 |     char *p = strpbrk(buffer, "eE"); | 
 | 235 |     if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { | 
 | 236 |         char *start = p + 2; | 
 | 237 |         int exponent_digit_cnt = 0; | 
 | 238 |         int leading_zero_cnt = 0; | 
 | 239 |         int in_leading_zeros = 1; | 
 | 240 |         int significant_digit_cnt; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 241 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 242 |         /* Skip over the exponent and the sign. */ | 
 | 243 |         p += 2; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 244 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 245 |         /* Find the end of the exponent, keeping track of leading | 
 | 246 |            zeros. */ | 
 | 247 |         while (*p && isdigit(Py_CHARMASK(*p))) { | 
 | 248 |             if (in_leading_zeros && *p == '0') | 
 | 249 |                 ++leading_zero_cnt; | 
 | 250 |             if (*p != '0') | 
 | 251 |                 in_leading_zeros = 0; | 
 | 252 |             ++p; | 
 | 253 |             ++exponent_digit_cnt; | 
 | 254 |         } | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 255 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 256 |         significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; | 
 | 257 |         if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { | 
 | 258 |             /* If there are 2 exactly digits, we're done, | 
 | 259 |                regardless of what they contain */ | 
 | 260 |         } | 
 | 261 |         else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { | 
 | 262 |             int extra_zeros_cnt; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 263 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 264 |             /* There are more than 2 digits in the exponent.  See | 
 | 265 |                if we can delete some of the leading zeros */ | 
 | 266 |             if (significant_digit_cnt < MIN_EXPONENT_DIGITS) | 
 | 267 |                 significant_digit_cnt = MIN_EXPONENT_DIGITS; | 
 | 268 |             extra_zeros_cnt = exponent_digit_cnt - | 
 | 269 |                 significant_digit_cnt; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 270 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 271 |             /* Delete extra_zeros_cnt worth of characters from the | 
 | 272 |                front of the exponent */ | 
 | 273 |             assert(extra_zeros_cnt >= 0); | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 274 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 275 |             /* Add one to significant_digit_cnt to copy the | 
 | 276 |                trailing 0 byte, thus setting the length */ | 
 | 277 |             memmove(start, | 
 | 278 |                 start + extra_zeros_cnt, | 
 | 279 |                 significant_digit_cnt + 1); | 
 | 280 |         } | 
 | 281 |         else { | 
 | 282 |             /* If there are fewer than 2 digits, add zeros | 
 | 283 |                until there are 2, if there's enough room */ | 
 | 284 |             int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; | 
 | 285 |             if (start + zeros + exponent_digit_cnt + 1 | 
 | 286 |                   < buffer + buf_size) { | 
 | 287 |                 memmove(start + zeros, start, | 
 | 288 |                     exponent_digit_cnt + 1); | 
 | 289 |                 memset(start, '0', zeros); | 
 | 290 |             } | 
 | 291 |         } | 
 | 292 |     } | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 293 | } | 
 | 294 |  | 
 | 295 | /* Ensure that buffer has a decimal point in it.  The decimal point | 
 | 296 |    will not be in the current locale, it will always be '.' */ | 
 | 297 | Py_LOCAL_INLINE(void) | 
 | 298 | ensure_decimal_point(char* buffer, size_t buf_size) | 
 | 299 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 300 |     int insert_count = 0; | 
 | 301 |     char* chars_to_insert; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 302 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 303 |     /* search for the first non-digit character */ | 
 | 304 |     char *p = buffer; | 
 | 305 |     if (*p == '-' || *p == '+') | 
 | 306 |         /* Skip leading sign, if present.  I think this could only | 
 | 307 |            ever be '-', but it can't hurt to check for both. */ | 
 | 308 |         ++p; | 
 | 309 |     while (*p && isdigit(Py_CHARMASK(*p))) | 
 | 310 |         ++p; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 311 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 312 |     if (*p == '.') { | 
 | 313 |         if (isdigit(Py_CHARMASK(*(p+1)))) { | 
 | 314 |             /* Nothing to do, we already have a decimal | 
 | 315 |                point and a digit after it */ | 
 | 316 |         } | 
 | 317 |         else { | 
 | 318 |             /* We have a decimal point, but no following | 
 | 319 |                digit.  Insert a zero after the decimal. */ | 
 | 320 |             ++p; | 
 | 321 |             chars_to_insert = "0"; | 
 | 322 |             insert_count = 1; | 
 | 323 |         } | 
 | 324 |     } | 
 | 325 |     else { | 
 | 326 |         chars_to_insert = ".0"; | 
 | 327 |         insert_count = 2; | 
 | 328 |     } | 
 | 329 |     if (insert_count) { | 
 | 330 |         size_t buf_len = strlen(buffer); | 
 | 331 |         if (buf_len + insert_count + 1 >= buf_size) { | 
 | 332 |             /* If there is not enough room in the buffer | 
 | 333 |                for the additional text, just skip it.  It's | 
 | 334 |                not worth generating an error over. */ | 
 | 335 |         } | 
 | 336 |         else { | 
 | 337 |             memmove(p + insert_count, p, | 
 | 338 |                 buffer + strlen(buffer) - p + 1); | 
 | 339 |             memcpy(p, chars_to_insert, insert_count); | 
 | 340 |         } | 
 | 341 |     } | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 342 | } | 
 | 343 |  | 
 | 344 | /* Add the locale specific grouping characters to buffer.  Note | 
 | 345 |    that any decimal point (if it's present) in buffer is already | 
 | 346 |    locale-specific.  Return 0 on error, else 1. */ | 
 | 347 | Py_LOCAL_INLINE(int) | 
 | 348 | add_thousands_grouping(char* buffer, size_t buf_size) | 
 | 349 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 350 |     Py_ssize_t len = strlen(buffer); | 
 | 351 |     struct lconv *locale_data = localeconv(); | 
 | 352 |     const char *decimal_point = locale_data->decimal_point; | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 353 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 354 |     /* Find the decimal point, if any.  We're only concerned | 
 | 355 |        about the characters to the left of the decimal when | 
 | 356 |        adding grouping. */ | 
 | 357 |     char *p = strstr(buffer, decimal_point); | 
 | 358 |     if (!p) { | 
 | 359 |         /* No decimal, use the entire string. */ | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 360 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 361 |         /* If any exponent, adjust p. */ | 
 | 362 |         p = strpbrk(buffer, "eE"); | 
 | 363 |         if (!p) | 
 | 364 |             /* No exponent and no decimal.  Use the entire | 
 | 365 |                string. */ | 
 | 366 |             p = buffer + len; | 
 | 367 |     } | 
 | 368 |     /* At this point, p points just past the right-most character we | 
 | 369 |        want to format.  We need to add the grouping string for the | 
 | 370 |        characters between buffer and p. */ | 
 | 371 |     return _PyString_InsertThousandsGrouping(buffer, len, p-buffer, | 
 | 372 |                                              buf_size, NULL, 1); | 
| Eric Smith | 0a95063 | 2008-04-30 01:09:30 +0000 | [diff] [blame] | 373 | } | 
 | 374 |  | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 375 | /* see FORMATBUFLEN in unicodeobject.c */ | 
 | 376 | #define FLOAT_FORMATBUFLEN 120 | 
 | 377 |  | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 378 | /** | 
 | 379 |  * PyOS_ascii_formatd: | 
 | 380 |  * @buffer: A buffer to place the resulting string in | 
| Eric Smith | 8113ca6 | 2008-03-17 11:01:01 +0000 | [diff] [blame] | 381 |  * @buf_size: The length of the buffer. | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 382 |  * @format: The printf()-style format to use for the | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 383 |  *          code to use for converting. | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 384 |  * @d: The #gdouble to convert | 
 | 385 |  * | 
 | 386 |  * Converts a #gdouble to a string, using the '.' as | 
 | 387 |  * decimal point. To format the number you pass in | 
 | 388 |  * a printf()-style format string. Allowed conversion | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 389 |  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'. | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 390 |  * | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 391 |  * 'n' is the same as 'g', except it uses the current locale. | 
| Eric Smith | 8113ca6 | 2008-03-17 11:01:01 +0000 | [diff] [blame] | 392 |  * 'Z' is the same as 'g', except it always has a decimal and | 
 | 393 |  *     at least one digit after the decimal. | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 394 |  * | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 395 |  * Return value: The pointer to the buffer with the converted string. | 
 | 396 |  **/ | 
 | 397 | char * | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 398 | PyOS_ascii_formatd(char       *buffer, | 
 | 399 |                    size_t      buf_size, | 
 | 400 |                    const char *format, | 
 | 401 |                    double      d) | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 402 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 403 |     char format_char; | 
 | 404 |     size_t format_len = strlen(format); | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 405 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 406 |     /* For type 'n', we need to make a copy of the format string, because | 
 | 407 |        we're going to modify 'n' -> 'g', and format is const char*, so we | 
 | 408 |        can't modify it directly.  FLOAT_FORMATBUFLEN should be longer than | 
 | 409 |        we ever need this to be.  There's an upcoming check to ensure it's | 
 | 410 |        big enough. */ | 
 | 411 |     /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but | 
 | 412 |        also with at least one character past the decimal. */ | 
 | 413 |     char tmp_format[FLOAT_FORMATBUFLEN]; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 414 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 415 |     /* The last character in the format string must be the format char */ | 
 | 416 |     format_char = format[format_len - 1]; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 417 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 418 |     if (format[0] != '%') | 
 | 419 |         return NULL; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 420 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 421 |     /* I'm not sure why this test is here.  It's ensuring that the format | 
 | 422 |        string after the first character doesn't have a single quote, a | 
 | 423 |        lowercase l, or a percent. This is the reverse of the commented-out | 
 | 424 |        test about 10 lines ago. */ | 
 | 425 |     if (strpbrk(format + 1, "'l%")) | 
 | 426 |         return NULL; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 427 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 428 |     /* Also curious about this function is that it accepts format strings | 
 | 429 |        like "%xg", which are invalid for floats.  In general, the | 
 | 430 |        interface to this function is not very good, but changing it is | 
 | 431 |        difficult because it's a public API. */ | 
| Eric Smith | 8113ca6 | 2008-03-17 11:01:01 +0000 | [diff] [blame] | 432 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 433 |     if (!(format_char == 'e' || format_char == 'E' || | 
 | 434 |           format_char == 'f' || format_char == 'F' || | 
 | 435 |           format_char == 'g' || format_char == 'G' || | 
 | 436 |           format_char == 'n' || format_char == 'Z')) | 
 | 437 |         return NULL; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 438 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 439 |     /* Map 'n' or 'Z' format_char to 'g', by copying the format string and | 
 | 440 |        replacing the final char with a 'g' */ | 
 | 441 |     if (format_char == 'n' || format_char == 'Z') { | 
 | 442 |         if (format_len + 1 >= sizeof(tmp_format)) { | 
 | 443 |             /* The format won't fit in our copy.  Error out.  In | 
 | 444 |                practice, this will never happen and will be | 
 | 445 |                detected by returning NULL */ | 
 | 446 |             return NULL; | 
 | 447 |         } | 
 | 448 |         strcpy(tmp_format, format); | 
 | 449 |         tmp_format[format_len - 1] = 'g'; | 
 | 450 |         format = tmp_format; | 
 | 451 |     } | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 452 |  | 
| Eric Smith | 8113ca6 | 2008-03-17 11:01:01 +0000 | [diff] [blame] | 453 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 454 |     /* Have PyOS_snprintf do the hard work */ | 
 | 455 |     PyOS_snprintf(buffer, buf_size, format, d); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 456 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 457 |     /* Do various fixups on the return string */ | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 458 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 459 |     /* Get the current locale, and find the decimal point string. | 
 | 460 |        Convert that string back to a dot.  Do not do this if using the | 
 | 461 |        'n' (number) format code, since we want to keep the localized | 
 | 462 |        decimal point in that case. */ | 
 | 463 |     if (format_char != 'n') | 
 | 464 |         change_decimal_from_locale_to_dot(buffer); | 
| Eric Smith | 7ef40bf | 2008-02-20 23:34:22 +0000 | [diff] [blame] | 465 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 466 |     /* If an exponent exists, ensure that the exponent is at least | 
 | 467 |        MIN_EXPONENT_DIGITS digits, providing the buffer is large enough | 
 | 468 |        for the extra zeros.  Also, if there are more than | 
 | 469 |        MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get | 
 | 470 |        back to MIN_EXPONENT_DIGITS */ | 
 | 471 |     ensure_minimum_exponent_length(buffer, buf_size); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 472 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 473 |     /* If format_char is 'Z', make sure we have at least one character | 
 | 474 |        after the decimal point (and make sure we have a decimal point). */ | 
 | 475 |     if (format_char == 'Z') | 
 | 476 |         ensure_decimal_point(buffer, buf_size); | 
| Eric Smith | 8113ca6 | 2008-03-17 11:01:01 +0000 | [diff] [blame] | 477 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 478 |     /* If format_char is 'n', add the thousands grouping. */ | 
 | 479 |     if (format_char == 'n') | 
 | 480 |         if (!add_thousands_grouping(buffer, buf_size)) | 
 | 481 |             return NULL; | 
| Eric Smith | 8113ca6 | 2008-03-17 11:01:01 +0000 | [diff] [blame] | 482 |  | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 483 |     return buffer; | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 484 | } | 
 | 485 |  | 
 | 486 | double | 
 | 487 | PyOS_ascii_atof(const char *nptr) | 
 | 488 | { | 
| Antoine Pitrou | c7c96a9 | 2010-05-09 15:15:40 +0000 | [diff] [blame] | 489 |     return PyOS_ascii_strtod(nptr, NULL); | 
| Martin v. Löwis | 737ea82 | 2004-06-08 18:52:54 +0000 | [diff] [blame] | 490 | } |