Blame - Python/pystrtod.c - platform/external/python/cpython3

blob: 16efa9d3ab8c2987d56a0c6436316ab3ee121e09 [file] [log] [blame]

Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	1	/* -- Mode: C; c-file-style: "python" -- */
				2
				3	#include <Python.h>
				4	#include <locale.h>
				5
				6	/* ascii character tests (as opposed to locale tests) */
				7	#define ISSPACE(c) ((c) == ' ' \|\| (c) == '\f' \|\| (c) == '\n' \|\| \
				8	(c) == '\r' \|\| (c) == '\t' \|\| (c) == '\v')
				9	#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
				10	#define ISXDIGIT(c) (ISDIGIT(c) \|\| ((c) >= 'a' && (c) <= 'f') \|\| ((c) >= 'A' && (c) <= 'F'))
				11
				12
				13	/**
				14	* PyOS_ascii_strtod:
				15	* @nptr: the string to convert to a numeric value.
				16	* @endptr: if non-%NULL, it returns the character after
				17	* the last character used in the conversion.
				18	*
				19	* Converts a string to a #gdouble value.
				20	* This function behaves like the standard strtod() function
				21	* does in the C locale. It does this without actually
				22	* changing the current locale, since that would not be
				23	* thread-safe.
				24	*
				25	* This function is typically used when reading configuration
				26	* files or other non-user input that should be locale independent.
				27	* To handle input from the user you should normally use the
				28	* locale-sensitive system strtod() function.
				29	*
				30	* If the correct value would cause overflow, plus or minus %HUGE_VAL
				31	* is returned (according to the sign of the value), and %ERANGE is
				32	* stored in %errno. If the correct value would cause underflow,
				33	* zero is returned and %ERANGE is stored in %errno.
Thomas Wouters	4d70c3d	2006-06-08 14:42:34 +0000	[diff] [blame]	34	* If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	35	*
				36	* This function resets %errno before calling strtod() so that
				37	* you can reliably detect overflow and underflow.
				38	*
				39	* Return value: the #gdouble value.
				40	**/
				41	double
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	42	PyOS_ascii_strtod(const char nptr, char *endptr)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	43	{
				44	char *fail_pos;
Neal Norwitz	0e7a0ed	2005-12-18 05:37:36 +0000	[diff] [blame]	45	double val = -1.0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	46	struct lconv *locale_data;
				47	const char *decimal_point;
Neal Norwitz	d39d861	2006-01-08 01:03:36 +0000	[diff] [blame]	48	size_t decimal_point_len;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	49	const char p, decimal_point_pos;
				50	const char end = NULL; / Silence gcc */
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	51	const char *digits_pos = NULL;
				52	int negate = 0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	53
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	54	assert(nptr != NULL);
				55
				56	fail_pos = NULL;
				57
				58	locale_data = localeconv();
				59	decimal_point = locale_data->decimal_point;
				60	decimal_point_len = strlen(decimal_point);
				61
				62	assert(decimal_point_len != 0);
				63
				64	decimal_point_pos = NULL;
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	65
				66	/* We process any leading whitespace and the optional sign manually,
				67	then pass the remainder to the system strtod. This ensures that
				68	the result of an underflow has the correct sign. (bug #1725) */
				69
				70	p = nptr;
				71	/* Skip leading space */
				72	while (ISSPACE(*p))
				73	p++;
				74
				75	/* Process leading sign, if present */
				76	if (*p == '-') {
				77	negate = 1;
				78	p++;
				79	} else if (*p == '+') {
				80	p++;
				81	}
				82
				83	/* What's left should begin with a digit, a decimal point, or one of
				84	the letters i, I, n, N. It should not begin with 0x or 0X */
				85	if ((!ISDIGIT(*p) &&
				86	p != '.' && p != 'i' && p != 'I' && p != 'n' && *p != 'N')
				87	\|\|
				88	(*p == '0' && (p[1] == 'x' \|\| p[1] == 'X')))
				89	{
				90	if (endptr)
				91	endptr = (char)nptr;
				92	errno = EINVAL;
				93	return val;
				94	}
				95	digits_pos = p;
				96
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	97	if (decimal_point[0] != '.' \|\|
				98	decimal_point[1] != 0)
				99	{
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	100	while (ISDIGIT(*p))
				101	p++;
				102
				103	if (*p == '.')
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	104	{
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	105	decimal_point_pos = p++;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	106
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	107	while (ISDIGIT(*p))
				108	p++;
				109
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	110	if (p == 'e' \|\| p == 'E')
				111	p++;
				112	if (p == '+' \|\| p == '-')
				113	p++;
				114	while (ISDIGIT(*p))
				115	p++;
				116	end = p;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	117	}
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	118	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
				119	{
				120	/* Python bug #1417699 */
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	121	if (endptr)
				122	endptr = (char)nptr;
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	123	errno = EINVAL;
				124	return val;
				125	}
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	126	/* For the other cases, we need not convert the decimal point */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	127	}
				128
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	129	/* Set errno to zero, so that we can distinguish zero results
				130	and underflows */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	131	errno = 0;
				132
				133	if (decimal_point_pos)
				134	{
				135	char copy, c;
				136
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	137	/* We need to convert the '.' to the locale specific decimal point */
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	138	copy = (char *)PyMem_MALLOC(end - digits_pos +
				139	1 + decimal_point_len);
Thomas Wouters	4d70c3d	2006-06-08 14:42:34 +0000	[diff] [blame]	140	if (copy == NULL) {
				141	if (endptr)
				142	endptr = (char )nptr;
				143	errno = ENOMEM;
				144	return val;
				145	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	146
				147	c = copy;
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	148	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
				149	c += decimal_point_pos - digits_pos;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	150	memcpy(c, decimal_point, decimal_point_len);
				151	c += decimal_point_len;
				152	memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
				153	c += end - (decimal_point_pos + 1);
				154	*c = 0;
				155
				156	val = strtod(copy, &fail_pos);
				157
				158	if (fail_pos)
				159	{
				160	if (fail_pos > decimal_point_pos)
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	161	fail_pos = (char *)digits_pos +
				162	(fail_pos - copy) -
				163	(decimal_point_len - 1);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	164	else
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	165	fail_pos = (char *)digits_pos +
				166	(fail_pos - copy);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	167	}
				168
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	169	PyMem_FREE(copy);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	170
				171	}
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	172	else {
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	173	val = strtod(digits_pos, &fail_pos);
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	174	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	175
Christian Heimes	faf2f63	2008-01-06 16:59:19 +0000	[diff] [blame]	176	if (fail_pos == digits_pos)
				177	fail_pos = (char *)nptr;
				178
				179	if (negate && fail_pos != nptr)
				180	val = -val;
				181
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	182	if (endptr)
				183	*endptr = fail_pos;
				184
				185	return val;
				186	}
				187
				188
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	189	/* From the C99 standard, section 7.19.6:
				190	The exponent always contains at least two digits, and only as many more digits
				191	as necessary to represent the exponent.
				192	*/
				193	#define MIN_EXPONENT_DIGITS 2
				194
				195	/* see FORMATBUFLEN in unicodeobject.c */
				196	#define FLOAT_FORMATBUFLEN 120
				197
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	198	/**
				199	* PyOS_ascii_formatd:
				200	* @buffer: A buffer to place the resulting string in
				201	* @buf_len: The length of the buffer.
				202	* @format: The printf()-style format to use for the
				203	* code to use for converting.
				204	* @d: The #gdouble to convert
				205	*
				206	* Converts a #gdouble to a string, using the '.' as
				207	* decimal point. To format the number you pass in
				208	* a printf()-style format string. Allowed conversion
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	209	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	210	*
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	211	* 'n' is the same as 'g', except it uses the current locale.
				212	*
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	213	* Return value: The pointer to the buffer with the converted string.
				214	**/
				215	char *
				216	PyOS_ascii_formatd(char *buffer,
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	217	size_t buf_len,
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	218	const char *format,
				219	double d)
				220	{
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	221	char *p;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	222	char format_char;
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	223	size_t format_len = strlen(format);
				224
				225	/* For type 'n', we need to make a copy of the format string, because
				226	we're going to modify 'n' -> 'g', and format is const char*, so we
				227	can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
				228	we ever need this to be. There's an upcoming check to ensure it's
				229	big enough. */
				230	char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	231
				232	/* g_return_val_if_fail (buffer != NULL, NULL); */
				233	/* g_return_val_if_fail (format[0] == '%', NULL); */
				234	/* g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */
				235
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	236	/* The last character in the format string must be the format char */
				237	format_char = format[format_len - 1];
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	238
				239	/* g_return_val_if_fail (format_char == 'e' \|\| format_char == 'E' \|\| */
				240	/* format_char == 'f' \|\| format_char == 'F' \|\| */
				241	/* format_char == 'g' \|\| format_char == 'G', */
				242	/* NULL); */
				243
				244	if (format[0] != '%')
				245	return NULL;
				246
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	247	/* I'm not sure why this test is here. It's ensuring that the format
				248	string after the first character doesn't have a single quote, a
				249	lowercase l, or a percent. This is the reverse of the commented-out
				250	test about 10 lines ago. */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	251	if (strpbrk(format + 1, "'l%"))
				252	return NULL;
				253
				254	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
				255	format_char == 'f' \|\| format_char == 'F' \|\|
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	256	format_char == 'g' \|\| format_char == 'G' \|\|
				257	format_char == 'n'))
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	258	return NULL;
				259
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	260	/* Map 'n' format_char to 'g', by copying the format string and
				261	replacing the final 'n' with a 'g' */
				262	if (format_char == 'n') {
				263	if (format_len + 1 >= sizeof(tmp_format)) {
				264	/* The format won't fit in our copy. Error out. In
				265	practice, this will never happen and will be detected
				266	by returning NULL */
				267	return NULL;
				268	}
				269	strcpy(tmp_format, format);
				270	tmp_format[format_len - 1] = 'g';
				271	format = tmp_format;
				272	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	273
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	274	/* Have PyOS_snprintf do the hard work */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	275	PyOS_snprintf(buffer, buf_len, format, d);
				276
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	277	/* Get the current local, and find the decimal point character (or
				278	string?). Convert that string back to a dot. Do not do this if
				279	using the 'n' (number) format code. */
				280	if (format_char != 'n') {
				281	struct lconv *locale_data = localeconv();
				282	const char *decimal_point = locale_data->decimal_point;
				283	size_t decimal_point_len = strlen(decimal_point);
				284	size_t rest_len;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	285
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	286	assert(decimal_point_len != 0);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	287
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	288	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
				289	p = buffer;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	290
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	291	if (p == '+' \|\| p == '-')
				292	p++;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	293
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	294	while (isdigit(Py_CHARMASK(*p)))
				295	p++;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	296
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	297	if (strncmp(p, decimal_point, decimal_point_len) == 0) {
				298	*p = '.';
				299	p++;
				300	if (decimal_point_len > 1) {
				301	rest_len = strlen(p +
				302	(decimal_point_len - 1));
				303	memmove(p, p + (decimal_point_len - 1),
				304	rest_len);
				305	p[rest_len] = 0;
				306	}
				307	}
				308	}
				309	}
				310
				311	/* If an exponent exists, ensure that the exponent is at least
				312	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
				313	for the extra zeros. Also, if there are more than
				314	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
				315	back to MIN_EXPONENT_DIGITS */
				316	p = strpbrk(buffer, "eE");
				317	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
				318	char *start = p + 2;
				319	int exponent_digit_cnt = 0;
				320	int leading_zero_cnt = 0;
				321	int in_leading_zeros = 1;
				322	int significant_digit_cnt;
				323
				324	p += 2;
				325	while (p && isdigit(Py_CHARMASK(p))) {
				326	if (in_leading_zeros && *p == '0')
				327	++leading_zero_cnt;
				328	if (*p != '0')
				329	in_leading_zeros = 0;
				330	++p;
				331	++exponent_digit_cnt;
				332	}
				333
				334	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
				335	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
				336	/* If there are 2 exactly digits, we're done,
				337	regardless of what they contain */
				338	}
				339	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
				340	int extra_zeros_cnt;
				341
				342	/* There are more than 2 digits in the exponent. See
				343	if we can delete some of the leading zeros */
				344	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
				345	significant_digit_cnt = MIN_EXPONENT_DIGITS;
				346	extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt;
				347
				348	/* Delete extra_zeros_cnt worth of characters from the
				349	front of the exponent */
				350	assert(extra_zeros_cnt >= 0);
				351
				352	/* Add one to significant_digit_cnt to copy the
				353	trailing 0 byte, thus setting the length */
				354	memmove(start,
				355	start + extra_zeros_cnt,
				356	significant_digit_cnt + 1);
				357	}
				358	else {
				359	/* If there are fewer than 2 digits, add zeros
				360	until there are 2, if there's enough room */
				361	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
				362	if (start + zeros + exponent_digit_cnt + 1
				363	< buffer + buf_len) {
				364	memmove(start + zeros, start,
				365	exponent_digit_cnt + 1);
				366	memset(start, '0', zeros);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	367	}
				368	}
				369	}
				370
				371	return buffer;
				372	}
				373
				374	double
				375	PyOS_ascii_atof(const char *nptr)
				376	{
				377	return PyOS_ascii_strtod(nptr, NULL);
				378	}