Blame - Objects/stringlib/formatter.h - platform/external/python/cpython2

blob: 2bb2ed2ca894a3ba5073b8283e13cb06d0d82ebf [file] [log] [blame]

Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	1	/* implements the string, long, and float formatters. that is,
				2	string.__format__, etc. */
				3
				4	/* Before including this, you must include either:
				5	stringlib/unicodedefs.h
				6	stringlib/stringdefs.h
				7
				8	Also, you should define the names:
				9	FORMAT_STRING
				10	FORMAT_LONG
				11	FORMAT_FLOAT
				12	to be whatever you want the public names of these functions to
				13	be. These are the only non-static functions defined here.
				14	*/
				15
				16	#define ALLOW_PARENS_FOR_SIGN 0
				17
				18	/*
				19	get_integer consumes 0 or more decimal digit characters from an
				20	input string, updates *result with the corresponding positive
				21	integer, and returns the number of digits consumed.
				22
				23	returns -1 on error.
				24	*/
				25	static int
				26	get_integer(STRINGLIB_CHAR *ptr, STRINGLIB_CHAR end,
				27	Py_ssize_t *result)
				28	{
				29	Py_ssize_t accumulator, digitval, oldaccumulator;
				30	int numdigits;
				31	accumulator = numdigits = 0;
				32	for (;;(*ptr)++, numdigits++) {
				33	if (*ptr >= end)
				34	break;
				35	digitval = STRINGLIB_TODECIMAL(**ptr);
				36	if (digitval < 0)
				37	break;
				38	/*
				39	This trick was copied from old Unicode format code. It's cute,
				40	but would really suck on an old machine with a slow divide
				41	implementation. Fortunately, in the normal case we do not
				42	expect too many digits.
				43	*/
				44	oldaccumulator = accumulator;
				45	accumulator *= 10;
				46	if ((accumulator+10)/10 != oldaccumulator+1) {
				47	PyErr_Format(PyExc_ValueError,
				48	"Too many decimal digits in format string");
				49	return -1;
				50	}
				51	accumulator += digitval;
				52	}
				53	*result = accumulator;
				54	return numdigits;
				55	}
				56
				57	/************************************************************************/
				58	/********* standard format specifier parsing ************************/
				59	/************************************************************************/
				60
				61	/* returns true if this character is a specifier alignment token */
				62	Py_LOCAL_INLINE(int)
				63	is_alignment_token(STRINGLIB_CHAR c)
				64	{
				65	switch (c) {
				66	case '<': case '>': case '=': case '^':
				67	return 1;
				68	default:
				69	return 0;
				70	}
				71	}
				72
				73	/* returns true if this character is a sign element */
				74	Py_LOCAL_INLINE(int)
				75	is_sign_element(STRINGLIB_CHAR c)
				76	{
				77	switch (c) {
				78	case ' ': case '+': case '-':
				79	#if ALLOW_PARENS_FOR_SIGN
				80	case '(':
				81	#endif
				82	return 1;
				83	default:
				84	return 0;
				85	}
				86	}
				87
				88
				89	typedef struct {
				90	STRINGLIB_CHAR fill_char;
				91	STRINGLIB_CHAR align;
				92	STRINGLIB_CHAR sign;
				93	Py_ssize_t width;
				94	Py_ssize_t precision;
				95	STRINGLIB_CHAR type;
				96	} InternalFormatSpec;
				97
				98	/*
				99	ptr points to the start of the format_spec, end points just past its end.
				100	fills in format with the parsed information.
				101	returns 1 on success, 0 on failure.
				102	if failure, sets the exception
				103	*/
				104	static int
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	105	parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
				106	Py_ssize_t format_spec_len,
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	107	InternalFormatSpec *format,
				108	char default_type)
				109	{
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	110	STRINGLIB_CHAR *ptr = format_spec;
				111	STRINGLIB_CHAR *end = format_spec + format_spec_len;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	112
				113	/* end-ptr is used throughout this code to specify the length of
				114	the input string */
				115
				116	Py_ssize_t specified_width;
				117
				118	format->fill_char = '\0';
				119	format->align = '\0';
				120	format->sign = '\0';
				121	format->width = -1;
				122	format->precision = -1;
				123	format->type = default_type;
				124
				125	/* If the second char is an alignment token,
				126	then parse the fill char */
				127	if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
				128	format->align = ptr[1];
				129	format->fill_char = ptr[0];
				130	ptr += 2;
				131	}
				132	else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
				133	format->align = ptr[0];
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	134	++ptr;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	135	}
				136
				137	/* Parse the various sign options */
				138	if (end-ptr >= 1 && is_sign_element(ptr[0])) {
				139	format->sign = ptr[0];
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	140	++ptr;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	141	#if ALLOW_PARENS_FOR_SIGN
				142	if (end-ptr >= 1 && ptr[0] == ')') {
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	143	++ptr;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	144	}
				145	#endif
				146	}
				147
				148	/* The special case for 0-padding (backwards compat) */
				149	if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
				150	format->fill_char = '0';
				151	if (format->align == '\0') {
				152	format->align = '=';
				153	}
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	154	++ptr;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	155	}
				156
				157	/* XXX add error checking */
				158	specified_width = get_integer(&ptr, end, &format->width);
				159
				160	/* if specified_width is 0, we didn't consume any characters for
				161	the width. in that case, reset the width to -1, because
				162	get_integer() will have set it to zero */
				163	if (specified_width == 0) {
				164	format->width = -1;
				165	}
				166
				167	/* Parse field precision */
				168	if (end-ptr && ptr[0] == '.') {
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	169	++ptr;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	170
				171	/* XXX add error checking */
				172	specified_width = get_integer(&ptr, end, &format->precision);
				173
				174	/* not having a precision after a dot is an error */
				175	if (specified_width == 0) {
				176	PyErr_Format(PyExc_ValueError,
				177	"Format specifier missing precision");
				178	return 0;
				179	}
				180
				181	}
				182
				183	/* Finally, parse the type field */
				184
				185	if (end-ptr > 1) {
				186	/* invalid conversion spec */
				187	PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
				188	return 0;
				189	}
				190
				191	if (end-ptr == 1) {
				192	format->type = ptr[0];
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	193	++ptr;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	194	}
				195
				196	return 1;
				197	}
				198
				199	#if defined FORMAT_FLOAT \|\| defined FORMAT_LONG
				200	/************************************************************************/
				201	/********* common routines for numeric formatting *******************/
				202	/************************************************************************/
				203
				204	/* describes the layout for an integer, see the comment in
				205	_calc_integer_widths() for details */
				206	typedef struct {
				207	Py_ssize_t n_lpadding;
				208	Py_ssize_t n_spadding;
				209	Py_ssize_t n_rpadding;
				210	char lsign;
				211	Py_ssize_t n_lsign;
				212	char rsign;
				213	Py_ssize_t n_rsign;
				214	Py_ssize_t n_total; /* just a convenience, it's derivable from the
				215	other fields */
				216	} NumberFieldWidths;
				217
				218	/* not all fields of format are used. for example, precision is
				219	unused. should this take discrete params in order to be more clear
				220	about what it does? or is passing a single format parameter easier
				221	and more efficient enough to justify a little obfuscation? */
				222	static void
				223	calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
				224	Py_ssize_t n_digits, const InternalFormatSpec *format)
				225	{
				226	r->n_lpadding = 0;
				227	r->n_spadding = 0;
				228	r->n_rpadding = 0;
				229	r->lsign = '\0';
				230	r->n_lsign = 0;
				231	r->rsign = '\0';
				232	r->n_rsign = 0;
				233
				234	/* the output will look like:
				235	\| \|
				236	\| <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> \|
				237	\| \|
				238
				239	lsign and rsign are computed from format->sign and the actual
				240	sign of the number
				241
				242	digits is already known
				243
				244	the total width is either given, or computed from the
				245	actual digits
				246
				247	only one of lpadding, spadding, and rpadding can be non-zero,
				248	and it's calculated from the width and other fields
				249	*/
				250
				251	/* compute the various parts we're going to write */
				252	if (format->sign == '+') {
				253	/* always put a + or - */
				254	r->n_lsign = 1;
				255	r->lsign = (actual_sign == '-' ? '-' : '+');
				256	}
				257	#if ALLOW_PARENS_FOR_SIGN
				258	else if (format->sign == '(') {
				259	if (actual_sign == '-') {
				260	r->n_lsign = 1;
				261	r->lsign = '(';
				262	r->n_rsign = 1;
				263	r->rsign = ')';
				264	}
				265	}
				266	#endif
				267	else if (format->sign == ' ') {
				268	r->n_lsign = 1;
				269	r->lsign = (actual_sign == '-' ? '-' : ' ');
				270	}
				271	else {
				272	/* non specified, or the default (-) */
				273	if (actual_sign == '-') {
				274	r->n_lsign = 1;
				275	r->lsign = '-';
				276	}
				277	}
				278
				279	/* now the number of padding characters */
				280	if (format->width == -1) {
				281	/* no padding at all, nothing to do */
				282	}
				283	else {
				284	/* see if any padding is needed */
				285	if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
				286	/* no padding needed, we're already bigger than the
				287	requested width */
				288	}
				289	else {
				290	/* determine which of left, space, or right padding is
				291	needed */
				292	Py_ssize_t padding = format->width -
				293	(r->n_lsign + n_digits + r->n_rsign);
				294	if (format->align == '<')
				295	r->n_rpadding = padding;
				296	else if (format->align == '>')
				297	r->n_lpadding = padding;
				298	else if (format->align == '^') {
				299	r->n_lpadding = padding / 2;
				300	r->n_rpadding = padding - r->n_lpadding;
				301	}
				302	else if (format->align == '=')
				303	r->n_spadding = padding;
				304	else
				305	r->n_lpadding = padding;
				306	}
				307	}
				308	r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
				309	n_digits + r->n_rsign + r->n_rpadding;
				310	}
				311
				312	/* fill in the non-digit parts of a numbers's string representation,
				313	as determined in _calc_integer_widths(). returns the pointer to
				314	where the digits go. */
				315	static STRINGLIB_CHAR *
				316	fill_number(STRINGLIB_CHAR p_buf, const NumberFieldWidths spec,
				317	Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
				318	{
				319	STRINGLIB_CHAR* p_digits;
				320
				321	if (spec->n_lpadding) {
				322	STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
				323	p_buf += spec->n_lpadding;
				324	}
				325	if (spec->n_lsign == 1) {
				326	*p_buf++ = spec->lsign;
				327	}
				328	if (spec->n_spadding) {
				329	STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
				330	p_buf += spec->n_spadding;
				331	}
				332	p_digits = p_buf;
				333	p_buf += n_digits;
				334	if (spec->n_rsign == 1) {
				335	*p_buf++ = spec->rsign;
				336	}
				337	if (spec->n_rpadding) {
				338	STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
				339	p_buf += spec->n_rpadding;
				340	}
				341	return p_digits;
				342	}
				343	#endif /* FORMAT_FLOAT \|\| FORMAT_LONG */
				344
				345	/************************************************************************/
				346	/********* string formatting ****************************************/
				347	/************************************************************************/
				348
				349	static PyObject *
				350	format_string_internal(PyObject value, const InternalFormatSpec format)
				351	{
				352	Py_ssize_t width; /* total field width */
				353	Py_ssize_t lpad;
				354	STRINGLIB_CHAR *dst;
				355	STRINGLIB_CHAR *src = STRINGLIB_STR(value);
				356	Py_ssize_t len = STRINGLIB_LEN(value);
				357	PyObject *result = NULL;
				358
				359	/* sign is not allowed on strings */
				360	if (format->sign != '\0') {
				361	PyErr_SetString(PyExc_ValueError,
				362	"Sign not allowed in string format specifier");
				363	goto done;
				364	}
				365
				366	/* '=' alignment not allowed on strings */
				367	if (format->align == '=') {
				368	PyErr_SetString(PyExc_ValueError,
				369	"'=' alignment not allowed "
				370	"in string format specifier");
				371	goto done;
				372	}
				373
				374	/* if precision is specified, output no more that format.precision
				375	characters */
				376	if (format->precision >= 0 && len >= format->precision) {
				377	len = format->precision;
				378	}
				379
				380	if (format->width >= 0) {
				381	width = format->width;
				382
				383	/* but use at least len characters */
				384	if (len > width) {
				385	width = len;
				386	}
				387	}
				388	else {
				389	/* not specified, use all of the chars and no more */
				390	width = len;
				391	}
				392
				393	/* allocate the resulting string */
				394	result = STRINGLIB_NEW(NULL, width);
				395	if (result == NULL)
				396	goto done;
				397
				398	/* now write into that space */
				399	dst = STRINGLIB_STR(result);
				400
				401	/* figure out how much leading space we need, based on the
				402	aligning */
				403	if (format->align == '>')
				404	lpad = width - len;
				405	else if (format->align == '^')
				406	lpad = (width - len) / 2;
				407	else
				408	lpad = 0;
				409
				410	/* if right aligning, increment the destination allow space on the
				411	left */
				412	memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
				413
				414	/* do any padding */
				415	if (width > len) {
				416	STRINGLIB_CHAR fill_char = format->fill_char;
				417	if (fill_char == '\0') {
				418	/* use the default, if not specified */
				419	fill_char = ' ';
				420	}
				421
				422	/* pad on left */
				423	if (lpad)
				424	STRINGLIB_FILL(dst, fill_char, lpad);
				425
				426	/* pad on right */
				427	if (width - len - lpad)
				428	STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
				429	}
				430
				431	done:
				432	return result;
				433	}
				434
				435
				436	/************************************************************************/
				437	/********* long formatting ******************************************/
				438	/************************************************************************/
				439
				440	#if defined FORMAT_LONG \|\| defined FORMAT_INT
				441	typedef PyObject*
				442	(IntOrLongToString)(PyObject value, int base);
				443
				444	static PyObject *
				445	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				446	IntOrLongToString tostring)
				447	{
				448	PyObject *result = NULL;
				449	PyObject *tmp = NULL;
				450	STRINGLIB_CHAR *pnumeric_chars;
				451	STRINGLIB_CHAR numeric_char;
				452	STRINGLIB_CHAR sign = '\0';
				453	STRINGLIB_CHAR *p;
				454	Py_ssize_t n_digits; /* count of digits need from the computed
				455	string */
				456	Py_ssize_t n_leading_chars;
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	457	Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
				458	allocate, used for 'n'
				459	formatting. */
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	460	NumberFieldWidths spec;
				461	long x;
				462
				463	/* no precision allowed on integers */
				464	if (format->precision != -1) {
				465	PyErr_SetString(PyExc_ValueError,
				466	"Precision not allowed in integer format specifier");
				467	goto done;
				468	}
				469
				470
				471	/* special case for character formatting */
				472	if (format->type == 'c') {
				473	/* error to specify a sign */
				474	if (format->sign != '\0') {
				475	PyErr_SetString(PyExc_ValueError,
				476	"Sign not allowed with integer"
				477	" format specifier 'c'");
				478	goto done;
				479	}
				480
				481	/* taken from unicodeobject.c formatchar() */
				482	/* Integer input truncated to a character */
				483	/* XXX: won't work for int */
				484	x = PyLong_AsLong(value);
				485	if (x == -1 && PyErr_Occurred())
				486	goto done;
				487	#ifdef Py_UNICODE_WIDE
				488	if (x < 0 \|\| x > 0x10ffff) {
				489	PyErr_SetString(PyExc_OverflowError,
				490	"%c arg not in range(0x110000) "
				491	"(wide Python build)");
				492	goto done;
				493	}
				494	#else
				495	if (x < 0 \|\| x > 0xffff) {
				496	PyErr_SetString(PyExc_OverflowError,
				497	"%c arg not in range(0x10000) "
				498	"(narrow Python build)");
				499	goto done;
				500	}
				501	#endif
				502	numeric_char = (STRINGLIB_CHAR)x;
				503	pnumeric_chars = &numeric_char;
				504	n_digits = 1;
				505	}
				506	else {
				507	int base;
				508	int leading_chars_to_skip; /* Number of characters added by
				509	PyNumber_ToBase that we want to
				510	skip over. */
				511
				512	/* Compute the base and how many characters will be added by
				513	PyNumber_ToBase */
				514	switch (format->type) {
				515	case 'b':
				516	base = 2;
				517	leading_chars_to_skip = 2; /* 0b */
				518	break;
				519	case 'o':
				520	base = 8;
				521	leading_chars_to_skip = 2; /* 0o */
				522	break;
				523	case 'x':
				524	case 'X':
				525	base = 16;
				526	leading_chars_to_skip = 2; /* 0x */
				527	break;
				528	default: /* shouldn't be needed, but stops a compiler warning */
				529	case 'd':
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	530	case 'n':
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	531	base = 10;
				532	leading_chars_to_skip = 0;
				533	break;
				534	}
				535
				536	/* Do the hard part, converting to a string in a given base */
				537	tmp = tostring(value, base);
				538	if (tmp == NULL)
				539	goto done;
				540
				541	pnumeric_chars = STRINGLIB_STR(tmp);
				542	n_digits = STRINGLIB_LEN(tmp);
				543
				544	/* Remember not to modify what pnumeric_chars points to. it
				545	might be interned. Only modify it after we copy it into a
				546	newly allocated output buffer. */
				547
				548	/* Is a sign character present in the output? If so, remember it
				549	and skip it */
				550	sign = pnumeric_chars[0];
				551	if (sign == '-') {
				552	++leading_chars_to_skip;
				553	}
				554
				555	/* Skip over the leading chars (0x, 0b, etc.) */
				556	n_digits -= leading_chars_to_skip;
				557	pnumeric_chars += leading_chars_to_skip;
				558	}
				559
				560	/* Calculate the widths of the various leading and trailing parts */
				561	calc_number_widths(&spec, sign, n_digits, format);
				562
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	563	if (format->type == 'n')
				564	/* Compute how many additional chars we need to allocate
				565	to hold the thousands grouping. */
				566	STRINGLIB_GROUPING(pnumeric_chars, n_digits,
				567	pnumeric_chars+n_digits,
				568	0, &n_grouping_chars, 0);
				569
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	570	/* Allocate a new string to hold the result */
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	571	result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	572	if (!result)
				573	goto done;
				574	p = STRINGLIB_STR(result);
				575
				576	/* Fill in the digit parts */
				577	n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
				578	memmove(p + n_leading_chars,
				579	pnumeric_chars,
				580	n_digits * sizeof(STRINGLIB_CHAR));
				581
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	582	/* If type is 'X', convert to uppercase */
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	583	if (format->type == 'X') {
				584	Py_ssize_t t;
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	585	for (t = 0; t < n_digits; ++t)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	586	p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
				587	}
				588
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	589	/* Insert the grouping, if any, after the uppercasing of 'X', so we can
				590	ensure that grouping chars won't be affeted. */
				591	if (n_grouping_chars && format->type == 'n') {
				592	/* We know this can't fail, since we've already
				593	reserved enough space. */
				594	STRINGLIB_CHAR *pstart = p + n_leading_chars;
				595	int r = STRINGLIB_GROUPING(pstart, n_digits,
				596	pstart + n_digits,
				597	spec.n_total+n_grouping_chars-n_leading_chars,
				598	NULL, 0);
				599	assert(r);
				600	}
				601
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	602	/* Fill in the non-digit parts */
				603	fill_number(p, &spec, n_digits,
				604	format->fill_char == '\0' ? ' ' : format->fill_char);
				605
				606	done:
				607	Py_XDECREF(tmp);
				608	return result;
				609	}
				610	#endif /* defined FORMAT_LONG \|\| defined FORMAT_INT */
				611
				612	/************************************************************************/
				613	/********* float formatting *****************************************/
				614	/************************************************************************/
				615
				616	#ifdef FORMAT_FLOAT
				617	#if STRINGLIB_IS_UNICODE
				618	/* taken from unicodeobject.c */
				619	static Py_ssize_t
				620	strtounicode(Py_UNICODE buffer, const char charbuffer)
				621	{
				622	register Py_ssize_t i;
				623	Py_ssize_t len = strlen(charbuffer);
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	624	for (i = len - 1; i >= 0; --i)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	625	buffer[i] = (Py_UNICODE) charbuffer[i];
				626
				627	return len;
				628	}
				629	#endif
				630
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	631	/* see FORMATBUFLEN in unicodeobject.c */
				632	#define FLOAT_FORMATBUFLEN 120
				633
				634	/* much of this is taken from unicodeobject.c */
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	635	static PyObject *
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	636	format_float_internal(PyObject *value,
				637	const InternalFormatSpec *format)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	638	{
				639	/* fmt = '%.' + `prec` + `type` + '%%'
				640	worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
				641	char fmt[20];
				642
				643	/* taken from unicodeobject.c */
				644	/* Worst case length calc to ensure no buffer overrun:
				645
				646	'g' formats:
				647	fmt = %#.<prec>g
				648	buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
				649	for any double rep.)
				650	len = 1 + prec + 1 + 2 + 5 = 9 + prec
				651
				652	'f' formats:
				653	buf = '-' + [0-9]x + '.' + [0-9]prec (with x < 50)
				654	len = 1 + 50 + 1 + prec = 52 + prec
				655
				656	If prec=0 the effective precision is 1 (the leading digit is
				657	always given), therefore increase the length by one.
				658
				659	*/
				660	char charbuf[FLOAT_FORMATBUFLEN];
				661	Py_ssize_t n_digits;
				662	double x;
				663	Py_ssize_t precision = format->precision;
				664	PyObject *result = NULL;
				665	STRINGLIB_CHAR sign;
				666	char* trailing = "";
				667	STRINGLIB_CHAR *p;
				668	NumberFieldWidths spec;
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	669	STRINGLIB_CHAR type = format->type;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	670
				671	#if STRINGLIB_IS_UNICODE
				672	Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
				673	#endif
				674
				675	/* first, do the conversion as 8-bit chars, using the platform's
				676	snprintf. then, if needed, convert to unicode. */
				677
				678	/* 'F' is the same as 'f', per the PEP */
				679	if (type == 'F')
				680	type = 'f';
				681
				682	x = PyFloat_AsDouble(value);
				683
				684	if (x == -1.0 && PyErr_Occurred())
				685	goto done;
				686
				687	if (type == '%') {
				688	type = 'f';
				689	x *= 100;
				690	trailing = "%";
				691	}
				692
				693	if (precision < 0)
				694	precision = 6;
				695	if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
				696	type = 'g';
				697
				698	/* cast "type", because if we're in unicode we need to pass a
				699	8-bit char. this is safe, because we've restricted what "type"
				700	can be */
				701	PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
				702	(char)type);
				703
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	704	/* do the actual formatting */
				705	PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	706
				707	/* adding trailing to fmt with PyOS_snprintf doesn't work, not
				708	sure why. we'll just concatentate it here, no harm done. we
				709	know we can't have a buffer overflow from the fmt size
				710	analysis */
				711	strcat(charbuf, trailing);
				712
				713	/* rather than duplicate the code for snprintf for both unicode
				714	and 8 bit strings, we just use the 8 bit version and then
				715	convert to unicode in a separate code path. that's probably
				716	the lesser of 2 evils. */
				717	#if STRINGLIB_IS_UNICODE
				718	n_digits = strtounicode(unicodebuf, charbuf);
				719	p = unicodebuf;
				720	#else
				721	/* compute the length. I believe this is done because the return
				722	value from snprintf above is unreliable */
				723	n_digits = strlen(charbuf);
				724	p = charbuf;
				725	#endif
				726
				727	/* is a sign character present in the output? if so, remember it
				728	and skip it */
				729	sign = p[0];
				730	if (sign == '-') {
Eric Smith	8a803dd	2008-02-20 23:39:28 +0000	[diff] [blame]	731	++p;
				732	--n_digits;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	733	}
				734
				735	calc_number_widths(&spec, sign, n_digits, format);
				736
				737	/* allocate a string with enough space */
				738	result = STRINGLIB_NEW(NULL, spec.n_total);
				739	if (result == NULL)
				740	goto done;
				741
				742	/* fill in the non-digit parts */
				743	fill_number(STRINGLIB_STR(result), &spec, n_digits,
				744	format->fill_char == '\0' ? ' ' : format->fill_char);
				745
				746	/* fill in the digit parts */
				747	memmove(STRINGLIB_STR(result) +
				748	(spec.n_lpadding + spec.n_lsign + spec.n_spadding),
				749	p,
				750	n_digits * sizeof(STRINGLIB_CHAR));
				751
				752	done:
				753	return result;
				754	}
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	755	#endif /* FORMAT_FLOAT */
				756
				757	/************************************************************************/
				758	/********* built in formatters **************************************/
				759	/************************************************************************/
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	760	PyObject *
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	761	FORMAT_STRING(PyObject *obj,
				762	STRINGLIB_CHAR *format_spec,
				763	Py_ssize_t format_spec_len)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	764	{
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	765	InternalFormatSpec format;
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	766	PyObject *result = NULL;
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	767
				768	/* check for the special case of zero length format spec, make
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	769	it equivalent to str(obj) */
				770	if (format_spec_len == 0) {
				771	result = STRINGLIB_TOSTR(obj);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	772	goto done;
				773	}
				774
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	775	/* parse the format_spec */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	776	if (!parse_internal_render_format_spec(format_spec, format_spec_len,
				777	&format, 's'))
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	778	goto done;
				779
				780	/* type conversion? */
				781	switch (format.type) {
				782	case 's':
				783	/* no type conversion needed, already a string. do the formatting */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	784	result = format_string_internal(obj, &format);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	785	break;
				786	default:
				787	/* unknown */
Martin v. Löwis	d918e4e	2008-04-07 03:08:28 +0000	[diff] [blame]	788	#if STRINGLIB_IS_UNICODE
				789	/* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
				790	hence the two cases. If it is char, gcc complains that the
				791	condition below is always true, hence the ifdef. */
				792	if (format.type > 32 && format.type <128)
				793	#endif
				794	PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
				795	(char)format.type);
				796	#if STRINGLIB_IS_UNICODE
				797	else
				798	PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'",
				799	(unsigned int)format.type);
				800	#endif
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	801	goto done;
				802	}
				803
				804	done:
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	805	return result;
				806	}
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	807
				808	#if defined FORMAT_LONG \|\| defined FORMAT_INT
				809	static PyObject*
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	810	format_int_or_long(PyObject* obj,
				811	STRINGLIB_CHAR *format_spec,
				812	Py_ssize_t format_spec_len,
				813	IntOrLongToString tostring)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	814	{
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	815	PyObject *result = NULL;
				816	PyObject *tmp = NULL;
				817	InternalFormatSpec format;
				818
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	819	/* check for the special case of zero length format spec, make
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	820	it equivalent to str(obj) */
				821	if (format_spec_len == 0) {
				822	result = STRINGLIB_TOSTR(obj);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	823	goto done;
				824	}
				825
				826	/* parse the format_spec */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	827	if (!parse_internal_render_format_spec(format_spec,
				828	format_spec_len,
				829	&format, 'd'))
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	830	goto done;
				831
				832	/* type conversion? */
				833	switch (format.type) {
				834	case 'b':
				835	case 'c':
				836	case 'd':
				837	case 'o':
				838	case 'x':
				839	case 'X':
Eric Smith	cf537ff	2008-05-11 19:52:48 +0000	[diff] [blame]	840	case 'n':
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	841	/* no type conversion needed, already an int (or long). do
				842	the formatting */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	843	result = format_int_or_long_internal(obj, &format, tostring);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	844	break;
				845
				846	case 'e':
				847	case 'E':
				848	case 'f':
				849	case 'F':
				850	case 'g':
				851	case 'G':
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	852	case '%':
				853	/* convert to float */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	854	tmp = PyNumber_Float(obj);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	855	if (tmp == NULL)
				856	goto done;
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	857	result = format_float_internal(obj, &format);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	858	break;
				859
				860	default:
				861	/* unknown */
				862	PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
				863	format.type);
				864	goto done;
				865	}
				866
				867	done:
				868	Py_XDECREF(tmp);
				869	return result;
				870	}
				871	#endif /* FORMAT_LONG \|\| defined FORMAT_INT */
				872
				873	#ifdef FORMAT_LONG
				874	/* Need to define long_format as a function that will convert a long
				875	to a string. In 3.0, _PyLong_Format has the correct signature. In
				876	2.x, we need to fudge a few parameters */
				877	#if PY_VERSION_HEX >= 0x03000000
				878	#define long_format _PyLong_Format
				879	#else
				880	static PyObject*
				881	long_format(PyObject* value, int base)
				882	{
				883	/* Convert to base, don't add trailing 'L', and use the new octal
				884	format. We already know this is a long object */
				885	assert(PyLong_Check(value));
				886	/* convert to base, don't add 'L', and use the new octal format */
				887	return _PyLong_Format(value, base, 0, 1);
				888	}
				889	#endif
				890
				891	PyObject *
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	892	FORMAT_LONG(PyObject *obj,
				893	STRINGLIB_CHAR *format_spec,
				894	Py_ssize_t format_spec_len)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	895	{
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	896	return format_int_or_long(obj, format_spec, format_spec_len,
				897	long_format);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	898	}
				899	#endif /* FORMAT_LONG */
				900
				901	#ifdef FORMAT_INT
				902	/* this is only used for 2.x, not 3.0 */
				903	static PyObject*
				904	int_format(PyObject* value, int base)
				905	{
				906	/* Convert to base, and use the new octal format. We already
				907	know this is an int object */
				908	assert(PyInt_Check(value));
				909	return _PyInt_Format((PyIntObject*)value, base, 1);
				910	}
				911
				912	PyObject *
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	913	FORMAT_INT(PyObject *obj,
				914	STRINGLIB_CHAR *format_spec,
				915	Py_ssize_t format_spec_len)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	916	{
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	917	return format_int_or_long(obj, format_spec, format_spec_len,
				918	int_format);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	919	}
				920	#endif /* FORMAT_INT */
				921
				922	#ifdef FORMAT_FLOAT
				923	PyObject *
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	924	FORMAT_FLOAT(PyObject *obj,
				925	STRINGLIB_CHAR *format_spec,
				926	Py_ssize_t format_spec_len)
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	927	{
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	928	PyObject *result = NULL;
				929	InternalFormatSpec format;
				930
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	931	/* check for the special case of zero length format spec, make
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	932	it equivalent to str(obj) */
				933	if (format_spec_len == 0) {
				934	result = STRINGLIB_TOSTR(obj);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	935	goto done;
				936	}
				937
				938	/* parse the format_spec */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	939	if (!parse_internal_render_format_spec(format_spec,
				940	format_spec_len,
				941	&format, '\0'))
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	942	goto done;
				943
				944	/* type conversion? */
				945	switch (format.type) {
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	946	case '\0':
				947	/* 'Z' means like 'g', but with at least one decimal. See
				948	PyOS_ascii_formatd */
				949	format.type = 'Z';
				950	/* Deliberate fall through to the next case statement */
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	951	case 'e':
				952	case 'E':
				953	case 'f':
				954	case 'F':
				955	case 'g':
				956	case 'G':
				957	case 'n':
				958	case '%':
				959	/* no conversion, already a float. do the formatting */
Eric Smith	dc13b79	2008-05-30 18:10:04 +0000	[diff] [blame]	960	result = format_float_internal(obj, &format);
Eric Smith	a9f7d62	2008-02-17 19:46:49 +0000	[diff] [blame]	961	break;
				962
				963	default:
				964	/* unknown */
				965	PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
				966	format.type);
				967	goto done;
				968	}
				969
				970	done:
				971	return result;
				972	}
				973	#endif /* FORMAT_FLOAT */