Blame - Objects/stringlib/formatter.h - platform/external/python/cpython3

blob: e8e83f4b798a074b2e627cce2e846516e758b76f [file] [log] [blame]

Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	1	/* implements the string, long, and float formatters. that is,
				2	string.__format__, etc. */
				3
				4	/* Before including this, you must include either:
				5	stringlib/unicodedefs.h
				6	stringlib/stringdefs.h
				7
				8	Also, you should define the names:
				9	FORMAT_STRING
				10	FORMAT_LONG
				11	FORMAT_FLOAT
				12	to be whatever you want the public names of these functions to
				13	be. These are the only non-static functions defined here.
				14	*/
				15
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	16	#define ALLOW_PARENS_FOR_SIGN 0
				17
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	18	/*
				19	get_integer consumes 0 or more decimal digit characters from an
				20	input string, updates *result with the corresponding positive
				21	integer, and returns the number of digits consumed.
				22
				23	returns -1 on error.
				24	*/
				25	static int
				26	get_integer(STRINGLIB_CHAR *ptr, STRINGLIB_CHAR end,
				27	Py_ssize_t *result)
				28	{
				29	Py_ssize_t accumulator, digitval, oldaccumulator;
				30	int numdigits;
				31	accumulator = numdigits = 0;
				32	for (;;(*ptr)++, numdigits++) {
				33	if (*ptr >= end)
				34	break;
				35	digitval = STRINGLIB_TODECIMAL(**ptr);
				36	if (digitval < 0)
				37	break;
				38	/*
				39	This trick was copied from old Unicode format code. It's cute,
				40	but would really suck on an old machine with a slow divide
				41	implementation. Fortunately, in the normal case we do not
				42	expect too many digits.
				43	*/
				44	oldaccumulator = accumulator;
				45	accumulator *= 10;
				46	if ((accumulator+10)/10 != oldaccumulator+1) {
				47	PyErr_Format(PyExc_ValueError,
				48	"Too many decimal digits in format string");
				49	return -1;
				50	}
				51	accumulator += digitval;
				52	}
				53	*result = accumulator;
				54	return numdigits;
				55	}
				56
				57	/************************************************************************/
				58	/********* standard format specifier parsing ************************/
				59	/************************************************************************/
				60
				61	/* returns true if this character is a specifier alignment token */
				62	Py_LOCAL_INLINE(int)
				63	is_alignment_token(STRINGLIB_CHAR c)
				64	{
				65	switch (c) {
				66	case '<': case '>': case '=': case '^':
				67	return 1;
				68	default:
				69	return 0;
				70	}
				71	}
				72
				73	/* returns true if this character is a sign element */
				74	Py_LOCAL_INLINE(int)
				75	is_sign_element(STRINGLIB_CHAR c)
				76	{
				77	switch (c) {
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	78	case ' ': case '+': case '-':
Eric Smith	4430095	2007-08-29 12:43:12 +0000	[diff] [blame]	79	#if ALLOW_PARENS_FOR_SIGN
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	80	case '(':
Eric Smith	4430095	2007-08-29 12:43:12 +0000	[diff] [blame]	81	#endif
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	82	return 1;
				83	default:
				84	return 0;
				85	}
				86	}
				87
				88
				89	typedef struct {
				90	STRINGLIB_CHAR fill_char;
				91	STRINGLIB_CHAR align;
				92	STRINGLIB_CHAR sign;
				93	Py_ssize_t width;
				94	Py_ssize_t precision;
				95	STRINGLIB_CHAR type;
				96	} InternalFormatSpec;
				97
				98	/*
				99	ptr points to the start of the format_spec, end points just past its end.
				100	fills in format with the parsed information.
				101	returns 1 on success, 0 on failure.
				102	if failure, sets the exception
				103	*/
				104	static int
				105	parse_internal_render_format_spec(PyObject *format_spec,
				106	InternalFormatSpec *format,
				107	char default_type)
				108	{
				109	STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec);
				110	STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec);
				111
				112	/* end-ptr is used throughout this code to specify the length of
				113	the input string */
				114
				115	Py_ssize_t specified_width;
				116
				117	format->fill_char = '\0';
				118	format->align = '\0';
				119	format->sign = '\0';
				120	format->width = -1;
				121	format->precision = -1;
				122	format->type = default_type;
				123
				124	/* If the second char is an alignment token,
				125	then parse the fill char */
				126	if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
				127	format->align = ptr[1];
				128	format->fill_char = ptr[0];
				129	ptr += 2;
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	130	}
				131	else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	132	format->align = ptr[0];
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	133	++ptr;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	134	}
				135
				136	/* Parse the various sign options */
				137	if (end-ptr >= 1 && is_sign_element(ptr[0])) {
				138	format->sign = ptr[0];
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	139	++ptr;
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	140	#if ALLOW_PARENS_FOR_SIGN
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	141	if (end-ptr >= 1 && ptr[0] == ')') {
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	142	++ptr;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	143	}
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	144	#endif
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	145	}
				146
				147	/* The special case for 0-padding (backwards compat) */
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	148	if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	149	format->fill_char = '0';
				150	if (format->align == '\0') {
				151	format->align = '=';
				152	}
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	153	++ptr;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	154	}
				155
				156	/* XXX add error checking */
				157	specified_width = get_integer(&ptr, end, &format->width);
				158
				159	/* if specified_width is 0, we didn't consume any characters for
				160	the width. in that case, reset the width to -1, because
				161	get_integer() will have set it to zero */
				162	if (specified_width == 0) {
				163	format->width = -1;
				164	}
				165
				166	/* Parse field precision */
				167	if (end-ptr && ptr[0] == '.') {
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	168	++ptr;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	169
				170	/* XXX add error checking */
				171	specified_width = get_integer(&ptr, end, &format->precision);
				172
				173	/* not having a precision after a dot is an error */
				174	if (specified_width == 0) {
				175	PyErr_Format(PyExc_ValueError,
				176	"Format specifier missing precision");
				177	return 0;
				178	}
				179
				180	}
				181
				182	/* Finally, parse the type field */
				183
				184	if (end-ptr > 1) {
				185	/* invalid conversion spec */
				186	PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
				187	return 0;
				188	}
				189
				190	if (end-ptr == 1) {
				191	format->type = ptr[0];
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	192	++ptr;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	193	}
				194
				195	return 1;
				196	}
				197
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	198	#if defined FORMAT_FLOAT \|\| defined FORMAT_LONG
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	199	/************************************************************************/
				200	/********* common routines for numeric formatting *******************/
				201	/************************************************************************/
				202
				203	/* describes the layout for an integer, see the comment in
				204	_calc_integer_widths() for details */
				205	typedef struct {
				206	Py_ssize_t n_lpadding;
				207	Py_ssize_t n_spadding;
				208	Py_ssize_t n_rpadding;
				209	char lsign;
				210	Py_ssize_t n_lsign;
				211	char rsign;
				212	Py_ssize_t n_rsign;
				213	Py_ssize_t n_total; /* just a convenience, it's derivable from the
				214	other fields */
				215	} NumberFieldWidths;
				216
				217	/* not all fields of format are used. for example, precision is
				218	unused. should this take discrete params in order to be more clear
				219	about what it does? or is passing a single format parameter easier
				220	and more efficient enough to justify a little obfuscation? */
				221	static void
				222	calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
				223	Py_ssize_t n_digits, const InternalFormatSpec *format)
				224	{
				225	r->n_lpadding = 0;
				226	r->n_spadding = 0;
				227	r->n_rpadding = 0;
				228	r->lsign = '\0';
				229	r->n_lsign = 0;
				230	r->rsign = '\0';
				231	r->n_rsign = 0;
				232
				233	/* the output will look like:
				234	\| \|
				235	\| <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> \|
				236	\| \|
				237
				238	lsign and rsign are computed from format->sign and the actual
				239	sign of the number
				240
				241	digits is already known
				242
				243	the total width is either given, or computed from the
				244	actual digits
				245
				246	only one of lpadding, spadding, and rpadding can be non-zero,
				247	and it's calculated from the width and other fields
				248	*/
				249
				250	/* compute the various parts we're going to write */
				251	if (format->sign == '+') {
				252	/* always put a + or - */
				253	r->n_lsign = 1;
				254	r->lsign = (actual_sign == '-' ? '-' : '+');
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	255	}
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	256	#if ALLOW_PARENS_FOR_SIGN
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	257	else if (format->sign == '(') {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	258	if (actual_sign == '-') {
				259	r->n_lsign = 1;
				260	r->lsign = '(';
				261	r->n_rsign = 1;
				262	r->rsign = ')';
				263	}
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	264	}
Eric Smith	b7f5ba1	2007-08-29 12:38:45 +0000	[diff] [blame]	265	#endif
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	266	else if (format->sign == ' ') {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	267	r->n_lsign = 1;
				268	r->lsign = (actual_sign == '-' ? '-' : ' ');
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	269	}
				270	else {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	271	/* non specified, or the default (-) */
				272	if (actual_sign == '-') {
				273	r->n_lsign = 1;
				274	r->lsign = '-';
				275	}
				276	}
				277
				278	/* now the number of padding characters */
				279	if (format->width == -1) {
				280	/* no padding at all, nothing to do */
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	281	}
				282	else {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	283	/* see if any padding is needed */
				284	if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
				285	/* no padding needed, we're already bigger than the
				286	requested width */
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	287	}
				288	else {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	289	/* determine which of left, space, or right padding is
				290	needed */
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	291	Py_ssize_t padding = format->width -
				292	(r->n_lsign + n_digits + r->n_rsign);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	293	if (format->align == '<')
				294	r->n_rpadding = padding;
				295	else if (format->align == '>')
				296	r->n_lpadding = padding;
				297	else if (format->align == '^') {
				298	r->n_lpadding = padding / 2;
				299	r->n_rpadding = padding - r->n_lpadding;
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	300	}
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	301	else if (format->align == '=')
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	302	r->n_spadding = padding;
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	303	else
				304	r->n_lpadding = padding;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	305	}
				306	}
				307	r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
				308	n_digits + r->n_rsign + r->n_rpadding;
				309	}
				310
				311	/* fill in the non-digit parts of a numbers's string representation,
				312	as determined in _calc_integer_widths(). returns the pointer to
				313	where the digits go. */
				314	static STRINGLIB_CHAR *
				315	fill_number(STRINGLIB_CHAR p_buf, const NumberFieldWidths spec,
				316	Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
				317	{
				318	STRINGLIB_CHAR* p_digits;
				319
				320	if (spec->n_lpadding) {
				321	STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
				322	p_buf += spec->n_lpadding;
				323	}
				324	if (spec->n_lsign == 1) {
				325	*p_buf++ = spec->lsign;
				326	}
				327	if (spec->n_spadding) {
				328	STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
				329	p_buf += spec->n_spadding;
				330	}
				331	p_digits = p_buf;
				332	p_buf += n_digits;
				333	if (spec->n_rsign == 1) {
				334	*p_buf++ = spec->rsign;
				335	}
				336	if (spec->n_rpadding) {
				337	STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
				338	p_buf += spec->n_rpadding;
				339	}
				340	return p_digits;
				341	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	342	#endif /* FORMAT_FLOAT \|\| FORMAT_LONG */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	343
				344	/************************************************************************/
				345	/********* string formatting ****************************************/
				346	/************************************************************************/
				347
				348	static PyObject *
				349	format_string_internal(PyObject value, const InternalFormatSpec format)
				350	{
				351	Py_ssize_t width; /* total field width */
				352	Py_ssize_t lpad;
				353	STRINGLIB_CHAR *dst;
				354	STRINGLIB_CHAR *src = STRINGLIB_STR(value);
				355	Py_ssize_t len = STRINGLIB_LEN(value);
				356	PyObject *result = NULL;
				357
				358	/* sign is not allowed on strings */
				359	if (format->sign != '\0') {
				360	PyErr_SetString(PyExc_ValueError,
				361	"Sign not allowed in string format specifier");
				362	goto done;
				363	}
				364
				365	/* '=' alignment not allowed on strings */
				366	if (format->align == '=') {
				367	PyErr_SetString(PyExc_ValueError,
				368	"'=' alignment not allowed "
				369	"in string format specifier");
				370	goto done;
				371	}
				372
				373	/* if precision is specified, output no more that format.precision
				374	characters */
				375	if (format->precision >= 0 && len >= format->precision) {
				376	len = format->precision;
				377	}
				378
				379	if (format->width >= 0) {
				380	width = format->width;
				381
				382	/* but use at least len characters */
				383	if (len > width) {
				384	width = len;
				385	}
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	386	}
				387	else {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	388	/* not specified, use all of the chars and no more */
				389	width = len;
				390	}
				391
				392	/* allocate the resulting string */
				393	result = STRINGLIB_NEW(NULL, width);
				394	if (result == NULL)
				395	goto done;
				396
				397	/* now write into that space */
				398	dst = STRINGLIB_STR(result);
				399
				400	/* figure out how much leading space we need, based on the
				401	aligning */
				402	if (format->align == '>')
				403	lpad = width - len;
				404	else if (format->align == '^')
				405	lpad = (width - len) / 2;
				406	else
				407	lpad = 0;
				408
				409	/* if right aligning, increment the destination allow space on the
				410	left */
				411	memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
				412
				413	/* do any padding */
				414	if (width > len) {
				415	STRINGLIB_CHAR fill_char = format->fill_char;
				416	if (fill_char == '\0') {
				417	/* use the default, if not specified */
				418	fill_char = ' ';
				419	}
				420
				421	/* pad on left */
				422	if (lpad)
				423	STRINGLIB_FILL(dst, fill_char, lpad);
				424
				425	/* pad on right */
				426	if (width - len - lpad)
				427	STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
				428	}
				429
				430	done:
				431	return result;
				432	}
				433
				434
				435	/************************************************************************/
				436	/********* long formatting ******************************************/
				437	/************************************************************************/
				438
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	439	#if defined FORMAT_LONG \|\| defined FORMAT_INT
				440	typedef PyObject*
				441	(IntOrLongToString)(PyObject value, int base);
				442
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	443	static PyObject *
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	444	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				445	IntOrLongToString tostring)
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	446	{
				447	PyObject *result = NULL;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	448	PyObject *tmp = NULL;
				449	STRINGLIB_CHAR *pnumeric_chars;
				450	STRINGLIB_CHAR numeric_char;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	451	STRINGLIB_CHAR sign = '\0';
				452	STRINGLIB_CHAR *p;
				453	Py_ssize_t n_digits; /* count of digits need from the computed
				454	string */
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	455	Py_ssize_t n_leading_chars;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	456	NumberFieldWidths spec;
				457	long x;
				458
				459	/* no precision allowed on integers */
				460	if (format->precision != -1) {
				461	PyErr_SetString(PyExc_ValueError,
				462	"Precision not allowed in integer format specifier");
				463	goto done;
				464	}
				465
				466
				467	/* special case for character formatting */
				468	if (format->type == 'c') {
				469	/* error to specify a sign */
				470	if (format->sign != '\0') {
				471	PyErr_SetString(PyExc_ValueError,
				472	"Sign not allowed with integer"
				473	" format specifier 'c'");
				474	goto done;
				475	}
				476
				477	/* taken from unicodeobject.c formatchar() */
				478	/* Integer input truncated to a character */
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	479	/* XXX: won't work for int */
Christian Heimes	217cfd1	2007-12-02 14:31:20 +0000	[diff] [blame]	480	x = PyLong_AsLong(value);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	481	if (x == -1 && PyErr_Occurred())
				482	goto done;
				483	#ifdef Py_UNICODE_WIDE
				484	if (x < 0 \|\| x > 0x10ffff) {
				485	PyErr_SetString(PyExc_OverflowError,
				486	"%c arg not in range(0x110000) "
				487	"(wide Python build)");
				488	goto done;
				489	}
				490	#else
				491	if (x < 0 \|\| x > 0xffff) {
				492	PyErr_SetString(PyExc_OverflowError,
				493	"%c arg not in range(0x10000) "
				494	"(narrow Python build)");
				495	goto done;
				496	}
				497	#endif
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	498	numeric_char = (STRINGLIB_CHAR)x;
				499	pnumeric_chars = &numeric_char;
				500	n_digits = 1;
Eric Smith	0cb431c	2007-08-28 01:07:27 +0000	[diff] [blame]	501	}
				502	else {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	503	int base;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	504	int leading_chars_to_skip; /* Number of characters added by
				505	PyNumber_ToBase that we want to
				506	skip over. */
				507
				508	/* Compute the base and how many characters will be added by
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	509	PyNumber_ToBase */
				510	switch (format->type) {
				511	case 'b':
				512	base = 2;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	513	leading_chars_to_skip = 2; /* 0b */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	514	break;
				515	case 'o':
				516	base = 8;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	517	leading_chars_to_skip = 2; /* 0o */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	518	break;
				519	case 'x':
				520	case 'X':
				521	base = 16;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	522	leading_chars_to_skip = 2; /* 0x */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	523	break;
				524	default: /* shouldn't be needed, but stops a compiler warning */
				525	case 'd':
				526	base = 10;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	527	leading_chars_to_skip = 0;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	528	break;
				529	}
				530
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	531	/* Do the hard part, converting to a string in a given base */
				532	tmp = tostring(value, base);
				533	if (tmp == NULL)
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	534	goto done;
				535
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	536	pnumeric_chars = STRINGLIB_STR(tmp);
				537	n_digits = STRINGLIB_LEN(tmp);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	538
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	539	/* Remember not to modify what pnumeric_chars points to. it
				540	might be interned. Only modify it after we copy it into a
				541	newly allocated output buffer. */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	542
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	543	/* Is a sign character present in the output? If so, remember it
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	544	and skip it */
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	545	sign = pnumeric_chars[0];
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	546	if (sign == '-') {
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	547	++leading_chars_to_skip;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	548	}
				549
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	550	/* Skip over the leading chars (0x, 0b, etc.) */
				551	n_digits -= leading_chars_to_skip;
				552	pnumeric_chars += leading_chars_to_skip;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	553	}
				554
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	555	/* Calculate the widths of the various leading and trailing parts */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	556	calc_number_widths(&spec, sign, n_digits, format);
				557
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	558	/* Allocate a new string to hold the result */
				559	result = STRINGLIB_NEW(NULL, spec.n_total);
				560	if (!result)
				561	goto done;
				562	p = STRINGLIB_STR(result);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	563
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	564	/* Fill in the digit parts */
				565	n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding;
				566	memmove(p + n_leading_chars,
				567	pnumeric_chars,
				568	n_digits * sizeof(STRINGLIB_CHAR));
				569
				570	/* if X, convert to uppercase */
				571	if (format->type == 'X') {
				572	Py_ssize_t t;
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	573	for (t = 0; t < n_digits; ++t)
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	574	p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	575	}
				576
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	577	/* Fill in the non-digit parts */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	578	fill_number(p, &spec, n_digits,
				579	format->fill_char == '\0' ? ' ' : format->fill_char);
				580
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	581	done:
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	582	Py_XDECREF(tmp);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	583	return result;
				584	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	585	#endif /* defined FORMAT_LONG \|\| defined FORMAT_INT */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	586
				587	/************************************************************************/
				588	/********* float formatting *****************************************/
				589	/************************************************************************/
				590
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	591	#ifdef FORMAT_FLOAT
				592	#if STRINGLIB_IS_UNICODE
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	593	/* taken from unicodeobject.c */
				594	static Py_ssize_t
				595	strtounicode(Py_UNICODE buffer, const char charbuffer)
				596	{
				597	register Py_ssize_t i;
				598	Py_ssize_t len = strlen(charbuffer);
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	599	for (i = len - 1; i >= 0; --i)
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	600	buffer[i] = (Py_UNICODE) charbuffer[i];
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	601
				602	return len;
				603	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	604	#endif
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	605
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	606	/* see FORMATBUFLEN in unicodeobject.c */
				607	#define FLOAT_FORMATBUFLEN 120
				608
				609	/* much of this is taken from unicodeobject.c */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	610	static PyObject *
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	611	format_float_internal(PyObject *value,
				612	const InternalFormatSpec *format)
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	613	{
				614	/* fmt = '%.' + `prec` + `type` + '%%'
				615	worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
				616	char fmt[20];
				617
				618	/* taken from unicodeobject.c */
				619	/* Worst case length calc to ensure no buffer overrun:
				620
				621	'g' formats:
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	622	fmt = %#.<prec>g
				623	buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
				624	for any double rep.)
				625	len = 1 + prec + 1 + 2 + 5 = 9 + prec
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	626
				627	'f' formats:
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	628	buf = '-' + [0-9]x + '.' + [0-9]prec (with x < 50)
				629	len = 1 + 50 + 1 + prec = 52 + prec
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	630
				631	If prec=0 the effective precision is 1 (the leading digit is
				632	always given), therefore increase the length by one.
				633
				634	*/
				635	char charbuf[FLOAT_FORMATBUFLEN];
				636	Py_ssize_t n_digits;
				637	double x;
				638	Py_ssize_t precision = format->precision;
				639	PyObject *result = NULL;
				640	STRINGLIB_CHAR sign;
				641	char* trailing = "";
				642	STRINGLIB_CHAR *p;
				643	NumberFieldWidths spec;
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	644	STRINGLIB_CHAR type = format->type;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	645
				646	#if STRINGLIB_IS_UNICODE
				647	Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
				648	#endif
				649
				650	/* first, do the conversion as 8-bit chars, using the platform's
				651	snprintf. then, if needed, convert to unicode. */
				652
				653	/* 'F' is the same as 'f', per the PEP */
				654	if (type == 'F')
				655	type = 'f';
				656
				657	x = PyFloat_AsDouble(value);
				658
				659	if (x == -1.0 && PyErr_Occurred())
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	660	goto done;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	661
				662	if (type == '%') {
				663	type = 'f';
				664	x *= 100;
				665	trailing = "%";
				666	}
				667
				668	if (precision < 0)
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	669	precision = 6;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	670	if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
Eric Smith	185e30c	2007-08-30 22:23:08 +0000	[diff] [blame]	671	type = 'g';
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	672
				673	/* cast "type", because if we're in unicode we need to pass a
				674	8-bit char. this is safe, because we've restricted what "type"
				675	can be */
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	676	PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
				677	(char)type);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	678
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	679	/* do the actual formatting */
				680	PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	681
				682	/* adding trailing to fmt with PyOS_snprintf doesn't work, not
				683	sure why. we'll just concatentate it here, no harm done. we
				684	know we can't have a buffer overflow from the fmt size
				685	analysis */
				686	strcat(charbuf, trailing);
				687
				688	/* rather than duplicate the code for snprintf for both unicode
				689	and 8 bit strings, we just use the 8 bit version and then
				690	convert to unicode in a separate code path. that's probably
				691	the lesser of 2 evils. */
				692	#if STRINGLIB_IS_UNICODE
				693	n_digits = strtounicode(unicodebuf, charbuf);
				694	p = unicodebuf;
				695	#else
				696	/* compute the length. I believe this is done because the return
				697	value from snprintf above is unreliable */
				698	n_digits = strlen(charbuf);
				699	p = charbuf;
				700	#endif
				701
				702	/* is a sign character present in the output? if so, remember it
				703	and skip it */
				704	sign = p[0];
				705	if (sign == '-') {
Christian Heimes	c3f30c4	2008-02-22 16:37:40 +0000	[diff] [blame^]	706	++p;
				707	--n_digits;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	708	}
				709
				710	calc_number_widths(&spec, sign, n_digits, format);
				711
				712	/* allocate a string with enough space */
				713	result = STRINGLIB_NEW(NULL, spec.n_total);
				714	if (result == NULL)
				715	goto done;
				716
				717	/* fill in the non-digit parts */
				718	fill_number(STRINGLIB_STR(result), &spec, n_digits,
				719	format->fill_char == '\0' ? ' ' : format->fill_char);
				720
				721	/* fill in the digit parts */
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	722	memmove(STRINGLIB_STR(result) +
				723	(spec.n_lpadding + spec.n_lsign + spec.n_spadding),
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	724	p,
				725	n_digits * sizeof(STRINGLIB_CHAR));
				726
				727	done:
				728	return result;
				729	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	730	#endif /* FORMAT_FLOAT */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	731
				732	/************************************************************************/
				733	/********* built in formatters **************************************/
				734	/************************************************************************/
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	735	#ifdef FORMAT_STRING
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	736	PyObject *
				737	FORMAT_STRING(PyObject* value, PyObject* args)
				738	{
				739	PyObject *format_spec;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	740	PyObject *result = NULL;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	741	#if PY_VERSION_HEX < 0x03000000
				742	PyObject *tmp = NULL;
				743	#endif
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	744	InternalFormatSpec format;
				745
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	746	/* If 2.x, we accept either str or unicode, and try to convert it
				747	to the right type. In 3.x, we insist on only unicode */
				748	#if PY_VERSION_HEX >= 0x03000000
				749	if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
				750	&format_spec))
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	751	goto done;
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	752	#else
				753	/* If 2.x, convert format_spec to the same type as value */
				754	/* This is to allow things like u''.format('') */
				755	if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
				756	goto done;
				757	if (!(PyString_Check(format_spec) \|\| PyUnicode_Check(format_spec))) {
				758	PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
				759	"or unicode, not %s", Py_TYPE(format_spec)->tp_name);
				760	goto done;
				761	}
				762	tmp = STRINGLIB_TOSTR(format_spec);
				763	if (tmp == NULL)
				764	goto done;
				765	format_spec = tmp;
				766	#endif
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	767
				768	/* check for the special case of zero length format spec, make
				769	it equivalent to str(value) */
				770	if (STRINGLIB_LEN(format_spec) == 0) {
				771	result = STRINGLIB_TOSTR(value);
				772	goto done;
				773	}
				774
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	775
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	776	/* parse the format_spec */
				777	if (!parse_internal_render_format_spec(format_spec, &format, 's'))
				778	goto done;
				779
				780	/* type conversion? */
				781	switch (format.type) {
				782	case 's':
				783	/* no type conversion needed, already a string. do the formatting */
				784	result = format_string_internal(value, &format);
				785	break;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	786	default:
				787	/* unknown */
				788	PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
				789	format.type);
				790	goto done;
				791	}
				792
				793	done:
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	794	#if PY_VERSION_HEX < 0x03000000
				795	Py_XDECREF(tmp);
				796	#endif
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	797	return result;
				798	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	799	#endif /* FORMAT_STRING */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	800
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	801	#if defined FORMAT_LONG \|\| defined FORMAT_INT
				802	static PyObject*
				803	format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	804	{
				805	PyObject *format_spec;
				806	PyObject *result = NULL;
				807	PyObject *tmp = NULL;
				808	InternalFormatSpec format;
				809
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	810	if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__",
				811	&format_spec))
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	812	goto done;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	813
				814	/* check for the special case of zero length format spec, make
				815	it equivalent to str(value) */
				816	if (STRINGLIB_LEN(format_spec) == 0) {
				817	result = STRINGLIB_TOSTR(value);
				818	goto done;
				819	}
				820
				821	/* parse the format_spec */
				822	if (!parse_internal_render_format_spec(format_spec, &format, 'd'))
				823	goto done;
				824
				825	/* type conversion? */
				826	switch (format.type) {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	827	case 'b':
				828	case 'c':
				829	case 'd':
				830	case 'o':
				831	case 'x':
				832	case 'X':
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	833	/* no type conversion needed, already an int (or long). do
				834	the formatting */
				835	result = format_int_or_long_internal(value, &format, tostring);
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	836	break;
				837
Eric Smith	fa767ef	2008-01-28 10:59:27 +0000	[diff] [blame]	838	case 'e':
				839	case 'E':
				840	case 'f':
				841	case 'F':
				842	case 'g':
				843	case 'G':
				844	case 'n':
				845	case '%':
				846	/* convert to float */
				847	tmp = PyNumber_Float(value);
				848	if (tmp == NULL)
				849	goto done;
				850	result = format_float_internal(value, &format);
				851	break;
				852
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	853	default:
				854	/* unknown */
				855	PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
				856	format.type);
				857	goto done;
				858	}
				859
				860	done:
				861	Py_XDECREF(tmp);
				862	return result;
				863	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	864	#endif /* FORMAT_LONG \|\| defined FORMAT_INT */
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	865
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	866	#ifdef FORMAT_LONG
				867	/* Need to define long_format as a function that will convert a long
				868	to a string. In 3.0, _PyLong_Format has the correct signature. In
				869	2.x, we need to fudge a few parameters */
				870	#if PY_VERSION_HEX >= 0x03000000
				871	#define long_format _PyLong_Format
				872	#else
				873	static PyObject*
				874	long_format(PyObject* value, int base)
				875	{
				876	/* Convert to base, don't add trailing 'L', and use the new octal
				877	format. We already know this is a long object */
				878	assert(PyLong_Check(value));
				879	/* convert to base, don't add 'L', and use the new octal format */
				880	return _PyLong_Format(value, base, 0, 1);
				881	}
				882	#endif
				883
				884	PyObject *
				885	FORMAT_LONG(PyObject* value, PyObject* args)
				886	{
				887	return format_int_or_long(value, args, long_format);
				888	}
				889	#endif /* FORMAT_LONG */
				890
				891	#ifdef FORMAT_INT
				892	/* this is only used for 2.x, not 3.0 */
				893	static PyObject*
				894	int_format(PyObject* value, int base)
				895	{
				896	/* Convert to base, and use the new octal format. We already
				897	know this is an int object */
				898	assert(PyInt_Check(value));
				899	return _PyInt_Format((PyIntObject*)value, base, 1);
				900	}
				901
				902	PyObject *
				903	FORMAT_INT(PyObject* value, PyObject* args)
				904	{
				905	return format_int_or_long(value, args, int_format);
				906	}
				907	#endif /* FORMAT_INT */
				908
				909	#ifdef FORMAT_FLOAT
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	910	PyObject *
				911	FORMAT_FLOAT(PyObject value, PyObject args)
				912	{
				913	PyObject *format_spec;
				914	PyObject *result = NULL;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	915	InternalFormatSpec format;
				916
Eric Smith	37f1038	2007-09-01 10:56:01 +0000	[diff] [blame]	917	if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec))
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	918	goto done;
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	919
				920	/* check for the special case of zero length format spec, make
				921	it equivalent to str(value) */
				922	if (STRINGLIB_LEN(format_spec) == 0) {
				923	result = STRINGLIB_TOSTR(value);
				924	goto done;
				925	}
				926
				927	/* parse the format_spec */
				928	if (!parse_internal_render_format_spec(format_spec, &format, 'g'))
				929	goto done;
				930
				931	/* type conversion? */
				932	switch (format.type) {
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	933	case 'e':
				934	case 'E':
				935	case 'f':
				936	case 'F':
				937	case 'g':
				938	case 'G':
				939	case 'n':
				940	case '%':
				941	/* no conversion, already a float. do the formatting */
				942	result = format_float_internal(value, &format);
				943	break;
				944
				945	default:
				946	/* unknown */
				947	PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
				948	format.type);
				949	goto done;
				950	}
				951
				952	done:
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	953	return result;
				954	}
Eric Smith	8fd3eba	2008-02-17 19:48:00 +0000	[diff] [blame]	955	#endif /* FORMAT_FLOAT */