Blame - Python/formatter_unicode.c - platform/external/python/cpython3

blob: 037880068a7d63014907d9f2c77143c1ab4716f9 [file] [log] [blame]

Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	1	/* implements the unicode (as opposed to string) version of the
				2	built-in formatters for string, int, float. that is, the versions
				3	of int.__float__, etc., that take and return unicode objects */
				4
				5	#include "Python.h"
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	6	#include <locale.h>
				7
				8	/* Raises an exception about an unknown presentation type for this
				9	* type. */
				10
				11	static void
				12	unknown_presentation_type(Py_UCS4 presentation_type,
				13	const char* type_name)
				14	{
				15	/* %c might be out-of-range, hence the two cases. */
				16	if (presentation_type > 32 && presentation_type < 128)
				17	PyErr_Format(PyExc_ValueError,
				18	"Unknown format code '%c' "
				19	"for object of type '%.200s'",
				20	(char)presentation_type,
				21	type_name);
				22	else
				23	PyErr_Format(PyExc_ValueError,
				24	"Unknown format code '\\x%x' "
				25	"for object of type '%.200s'",
				26	(unsigned int)presentation_type,
				27	type_name);
				28	}
				29
				30	static void
				31	invalid_comma_type(Py_UCS4 presentation_type)
				32	{
				33	if (presentation_type > 32 && presentation_type < 128)
				34	PyErr_Format(PyExc_ValueError,
				35	"Cannot specify ',' with '%c'.",
				36	(char)presentation_type);
				37	else
				38	PyErr_Format(PyExc_ValueError,
				39	"Cannot specify ',' with '\\x%x'.",
				40	(unsigned int)presentation_type);
				41	}
				42
				43	/*
				44	get_integer consumes 0 or more decimal digit characters from an
				45	input string, updates *result with the corresponding positive
				46	integer, and returns the number of digits consumed.
				47
				48	returns -1 on error.
				49	*/
				50	static int
				51	get_integer(PyObject str, Py_ssize_t pos, Py_ssize_t end,
				52	Py_ssize_t *result)
				53	{
				54	Py_ssize_t accumulator, digitval, oldaccumulator;
				55	int numdigits;
				56	accumulator = numdigits = 0;
				57	for (;;(*pos)++, numdigits++) {
				58	if (*pos >= end)
				59	break;
				60	digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
				61	if (digitval < 0)
				62	break;
				63	/*
				64	This trick was copied from old Unicode format code. It's cute,
				65	but would really suck on an old machine with a slow divide
				66	implementation. Fortunately, in the normal case we do not
				67	expect too many digits.
				68	*/
				69	oldaccumulator = accumulator;
				70	accumulator *= 10;
				71	if ((accumulator+10)/10 != oldaccumulator+1) {
				72	PyErr_Format(PyExc_ValueError,
				73	"Too many decimal digits in format string");
				74	return -1;
				75	}
				76	accumulator += digitval;
				77	}
				78	*result = accumulator;
				79	return numdigits;
				80	}
				81
				82	/************************************************************************/
				83	/********* standard format specifier parsing ************************/
				84	/************************************************************************/
				85
				86	/* returns true if this character is a specifier alignment token */
				87	Py_LOCAL_INLINE(int)
				88	is_alignment_token(Py_UCS4 c)
				89	{
				90	switch (c) {
				91	case '<': case '>': case '=': case '^':
				92	return 1;
				93	default:
				94	return 0;
				95	}
				96	}
				97
				98	/* returns true if this character is a sign element */
				99	Py_LOCAL_INLINE(int)
				100	is_sign_element(Py_UCS4 c)
				101	{
				102	switch (c) {
				103	case ' ': case '+': case '-':
				104	return 1;
				105	default:
				106	return 0;
				107	}
				108	}
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	109
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	110
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	111	typedef struct {
				112	Py_UCS4 fill_char;
				113	Py_UCS4 align;
				114	int alternate;
				115	Py_UCS4 sign;
				116	Py_ssize_t width;
				117	int thousands_separators;
				118	Py_ssize_t precision;
				119	Py_UCS4 type;
				120	} InternalFormatSpec;
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	121
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	122	#if 0
				123	/* Occassionally useful for debugging. Should normally be commented out. */
				124	static void
				125	DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
				126	{
				127	printf("internal format spec: fill_char %d\n", format->fill_char);
				128	printf("internal format spec: align %d\n", format->align);
				129	printf("internal format spec: alternate %d\n", format->alternate);
				130	printf("internal format spec: sign %d\n", format->sign);
				131	printf("internal format spec: width %zd\n", format->width);
				132	printf("internal format spec: thousands_separators %d\n",
				133	format->thousands_separators);
				134	printf("internal format spec: precision %zd\n", format->precision);
				135	printf("internal format spec: type %c\n", format->type);
				136	printf("\n");
				137	}
				138	#endif
				139
				140
				141	/*
				142	ptr points to the start of the format_spec, end points just past its end.
				143	fills in format with the parsed information.
				144	returns 1 on success, 0 on failure.
				145	if failure, sets the exception
				146	*/
				147	static int
				148	parse_internal_render_format_spec(PyObject *format_spec,
				149	Py_ssize_t start, Py_ssize_t end,
				150	InternalFormatSpec *format,
				151	char default_type,
				152	char default_align)
				153	{
				154	Py_ssize_t pos = start;
				155	/* end-pos is used throughout this code to specify the length of
				156	the input string */
				157	#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
				158
				159	Py_ssize_t consumed;
				160	int align_specified = 0;
				161
				162	format->fill_char = '\0';
				163	format->align = default_align;
				164	format->alternate = 0;
				165	format->sign = '\0';
				166	format->width = -1;
				167	format->thousands_separators = 0;
				168	format->precision = -1;
				169	format->type = default_type;
				170
				171	/* If the second char is an alignment token,
				172	then parse the fill char */
				173	if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
				174	format->align = READ_spec(pos+1);
				175	format->fill_char = READ_spec(pos);
				176	align_specified = 1;
				177	pos += 2;
				178	}
				179	else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
				180	format->align = READ_spec(pos);
				181	align_specified = 1;
				182	++pos;
				183	}
				184
				185	/* Parse the various sign options */
				186	if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
				187	format->sign = READ_spec(pos);
				188	++pos;
				189	}
				190
				191	/* If the next character is #, we're in alternate mode. This only
				192	applies to integers. */
				193	if (end-pos >= 1 && READ_spec(pos) == '#') {
				194	format->alternate = 1;
				195	++pos;
				196	}
				197
				198	/* The special case for 0-padding (backwards compat) */
				199	if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
				200	format->fill_char = '0';
				201	if (!align_specified) {
				202	format->align = '=';
				203	}
				204	++pos;
				205	}
				206
				207	consumed = get_integer(format_spec, &pos, end, &format->width);
				208	if (consumed == -1)
				209	/* Overflow error. Exception already set. */
				210	return 0;
				211
				212	/* If consumed is 0, we didn't consume any characters for the
				213	width. In that case, reset the width to -1, because
				214	get_integer() will have set it to zero. -1 is how we record
				215	that the width wasn't specified. */
				216	if (consumed == 0)
				217	format->width = -1;
				218
				219	/* Comma signifies add thousands separators */
				220	if (end-pos && READ_spec(pos) == ',') {
				221	format->thousands_separators = 1;
				222	++pos;
				223	}
				224
				225	/* Parse field precision */
				226	if (end-pos && READ_spec(pos) == '.') {
				227	++pos;
				228
				229	consumed = get_integer(format_spec, &pos, end, &format->precision);
				230	if (consumed == -1)
				231	/* Overflow error. Exception already set. */
				232	return 0;
				233
				234	/* Not having a precision after a dot is an error. */
				235	if (consumed == 0) {
				236	PyErr_Format(PyExc_ValueError,
				237	"Format specifier missing precision");
				238	return 0;
				239	}
				240
				241	}
				242
				243	/* Finally, parse the type field. */
				244
				245	if (end-pos > 1) {
				246	/* More than one char remain, invalid conversion spec. */
				247	PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
				248	return 0;
				249	}
				250
				251	if (end-pos == 1) {
				252	format->type = READ_spec(pos);
				253	++pos;
				254	}
				255
				256	/* Do as much validating as we can, just by looking at the format
				257	specifier. Do not take into account what type of formatting
				258	we're doing (int, float, string). */
				259
				260	if (format->thousands_separators) {
				261	switch (format->type) {
				262	case 'd':
				263	case 'e':
				264	case 'f':
				265	case 'g':
				266	case 'E':
				267	case 'G':
				268	case '%':
				269	case 'F':
				270	case '\0':
				271	/* These are allowed. See PEP 378.*/
				272	break;
				273	default:
				274	invalid_comma_type(format->type);
				275	return 0;
				276	}
				277	}
				278
				279	if (format->fill_char > 127 \|\| format->align > 127 \|\|
				280	format->sign > 127) {
				281	PyErr_SetString(PyExc_ValueError, "fill character too large");
				282	return 0;
				283	}
				284
				285	return 1;
				286	}
				287
				288	/* Calculate the padding needed. */
				289	static void
				290	calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
				291	Py_ssize_t n_lpadding, Py_ssize_t n_rpadding,
				292	Py_ssize_t *n_total)
				293	{
				294	if (width >= 0) {
				295	if (nchars > width)
				296	*n_total = nchars;
				297	else
				298	*n_total = width;
				299	}
				300	else {
				301	/* not specified, use all of the chars and no more */
				302	*n_total = nchars;
				303	}
				304
				305	/* Figure out how much leading space we need, based on the
				306	aligning */
				307	if (align == '>')
				308	n_lpadding = n_total - nchars;
				309	else if (align == '^')
				310	n_lpadding = (n_total - nchars) / 2;
				311	else if (align == '<' \|\| align == '=')
				312	*n_lpadding = 0;
				313	else {
				314	/* We should never have an unspecified alignment. */
				315	*n_lpadding = 0;
				316	assert(0);
				317	}
				318
				319	n_rpadding = n_total - nchars - *n_lpadding;
				320	}
				321
				322	static void
				323	unicode_fill(PyObject *str, Py_ssize_t start, Py_ssize_t end, Py_UCS4 ch)
				324	{
				325	int kind = PyUnicode_KIND(str);
				326	void *data = PyUnicode_DATA(str);
				327	while (start < end)
				328	PyUnicode_WRITE(kind, data, start++, ch);
				329	}
				330
				331	/* Do the padding, and return a pointer to where the caller-supplied
				332	content goes. */
				333	static Py_ssize_t
				334	fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
				335	Py_UCS4 fill_char, Py_ssize_t n_lpadding,
				336	Py_ssize_t n_rpadding)
				337	{
				338	/* Pad on left. */
				339	if (n_lpadding)
				340	unicode_fill(s, start, start + n_lpadding, fill_char);
				341
				342	/* Pad on right. */
				343	if (n_rpadding)
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	344	unicode_fill(s, start + nchars + n_lpadding,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	345	start + nchars + n_lpadding + n_rpadding, fill_char);
				346
				347	/* Pointer to the user content. */
				348	return start + n_lpadding;
				349	}
				350
				351	/************************************************************************/
				352	/********* common routines for numeric formatting *******************/
				353	/************************************************************************/
				354
				355	/* Locale type codes. */
				356	#define LT_CURRENT_LOCALE 0
				357	#define LT_DEFAULT_LOCALE 1
				358	#define LT_NO_LOCALE 2
				359
				360	/* Locale info needed for formatting integers and the part of floats
				361	before and including the decimal. Note that locales only support
				362	8-bit chars, not unicode. */
				363	typedef struct {
				364	char *decimal_point;
				365	char *thousands_sep;
				366	char *grouping;
				367	} LocaleInfo;
				368
				369	/* describes the layout for an integer, see the comment in
				370	calc_number_widths() for details */
				371	typedef struct {
				372	Py_ssize_t n_lpadding;
				373	Py_ssize_t n_prefix;
				374	Py_ssize_t n_spadding;
				375	Py_ssize_t n_rpadding;
				376	char sign;
				377	Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
				378	Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
				379	any grouping chars. */
				380	Py_ssize_t n_decimal; /* 0 if only an integer */
				381	Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
				382	excluding the decimal itself, if
				383	present. */
				384
				385	/* These 2 are not the widths of fields, but are needed by
				386	STRINGLIB_GROUPING. */
				387	Py_ssize_t n_digits; /* The number of digits before a decimal
				388	or exponent. */
				389	Py_ssize_t n_min_width; /* The min_width we used when we computed
				390	the n_grouped_digits width. */
				391	} NumberFieldWidths;
				392
				393
				394	/* Given a number of the form:
				395	digits[remainder]
				396	where ptr points to the start and end points to the end, find where
				397	the integer part ends. This could be a decimal, an exponent, both,
				398	or neither.
				399	If a decimal point is present, set *has_decimal and increment
				400	remainder beyond it.
				401	Results are undefined (but shouldn't crash) for improperly
				402	formatted strings.
				403	*/
				404	static void
				405	parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
				406	Py_ssize_t n_remainder, int has_decimal)
				407	{
				408	Py_ssize_t remainder;
				409
				410	while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
				411	++pos;
				412	remainder = pos;
				413
				414	/* Does remainder start with a decimal point? */
				415	*has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
				416
				417	/* Skip the decimal point. */
				418	if (*has_decimal)
				419	remainder++;
				420
				421	*n_remainder = end - remainder;
				422	}
				423
				424	/* not all fields of format are used. for example, precision is
				425	unused. should this take discrete params in order to be more clear
				426	about what it does? or is passing a single format parameter easier
				427	and more efficient enough to justify a little obfuscation? */
				428	static Py_ssize_t
				429	calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
				430	Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
				431	Py_ssize_t n_end, Py_ssize_t n_remainder,
				432	int has_decimal, const LocaleInfo *locale,
				433	const InternalFormatSpec *format)
				434	{
				435	Py_ssize_t n_non_digit_non_padding;
				436	Py_ssize_t n_padding;
				437
				438	spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
				439	spec->n_lpadding = 0;
				440	spec->n_prefix = n_prefix;
				441	spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
				442	spec->n_remainder = n_remainder;
				443	spec->n_spadding = 0;
				444	spec->n_rpadding = 0;
				445	spec->sign = '\0';
				446	spec->n_sign = 0;
				447
				448	/* the output will look like:
				449	\| \|
				450	\| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> \|
				451	\| \|
				452
				453	sign is computed from format->sign and the actual
				454	sign of the number
				455
				456	prefix is given (it's for the '0x' prefix)
				457
				458	digits is already known
				459
				460	the total width is either given, or computed from the
				461	actual digits
				462
				463	only one of lpadding, spadding, and rpadding can be non-zero,
				464	and it's calculated from the width and other fields
				465	*/
				466
				467	/* compute the various parts we're going to write */
				468	switch (format->sign) {
				469	case '+':
				470	/* always put a + or - */
				471	spec->n_sign = 1;
				472	spec->sign = (sign_char == '-' ? '-' : '+');
				473	break;
				474	case ' ':
				475	spec->n_sign = 1;
				476	spec->sign = (sign_char == '-' ? '-' : ' ');
				477	break;
				478	default:
				479	/* Not specified, or the default (-) */
				480	if (sign_char == '-') {
				481	spec->n_sign = 1;
				482	spec->sign = '-';
				483	}
				484	}
				485
				486	/* The number of chars used for non-digits and non-padding. */
				487	n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
				488	spec->n_remainder;
				489
				490	/* min_width can go negative, that's okay. format->width == -1 means
				491	we don't care. */
				492	if (format->fill_char == '0' && format->align == '=')
				493	spec->n_min_width = format->width - n_non_digit_non_padding;
				494	else
				495	spec->n_min_width = 0;
				496
				497	if (spec->n_digits == 0)
				498	/* This case only occurs when using 'c' formatting, we need
				499	to special case it because the grouping code always wants
				500	to have at least one character. */
				501	spec->n_grouped_digits = 0;
				502	else
				503	spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	504	NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	505	spec->n_digits, spec->n_min_width,
				506	locale->grouping, locale->thousands_sep);
				507
				508	/* Given the desired width and the total of digit and non-digit
				509	space we consume, see if we need any padding. format->width can
				510	be negative (meaning no padding), but this code still works in
				511	that case. */
				512	n_padding = format->width -
				513	(n_non_digit_non_padding + spec->n_grouped_digits);
				514	if (n_padding > 0) {
				515	/* Some padding is needed. Determine if it's left, space, or right. */
				516	switch (format->align) {
				517	case '<':
				518	spec->n_rpadding = n_padding;
				519	break;
				520	case '^':
				521	spec->n_lpadding = n_padding / 2;
				522	spec->n_rpadding = n_padding - spec->n_lpadding;
				523	break;
				524	case '=':
				525	spec->n_spadding = n_padding;
				526	break;
				527	case '>':
				528	spec->n_lpadding = n_padding;
				529	break;
				530	default:
				531	/* Shouldn't get here, but treat it as '>' */
				532	spec->n_lpadding = n_padding;
				533	assert(0);
				534	break;
				535	}
				536	}
				537	return spec->n_lpadding + spec->n_sign + spec->n_prefix +
				538	spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
				539	spec->n_remainder + spec->n_rpadding;
				540	}
				541
				542	/* Fill in the digit parts of a numbers's string representation,
				543	as determined in calc_number_widths().
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	544	Return -1 on error, or 0 on success. */
				545	static int
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	546	fill_number(PyObject out, Py_ssize_t pos, const NumberFieldWidths spec,
				547	PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	548	PyObject *prefix, Py_ssize_t p_start,
				549	Py_UCS4 fill_char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	550	LocaleInfo *locale, int toupper)
				551	{
				552	/* Used to keep track of digits, decimal, and remainder. */
				553	Py_ssize_t d_pos = d_start;
				554	unsigned int kind = PyUnicode_KIND(out);
				555	void *data = PyUnicode_DATA(out);
				556
				557	#ifndef NDEBUG
				558	Py_ssize_t r;
				559	#endif
				560
				561	if (spec->n_lpadding) {
				562	unicode_fill(out, pos, pos + spec->n_lpadding, fill_char);
				563	pos += spec->n_lpadding;
				564	}
				565	if (spec->n_sign == 1) {
				566	PyUnicode_WRITE(kind, data, pos++, spec->sign);
				567	}
				568	if (spec->n_prefix) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	569	if (PyUnicode_CopyCharacters(out, pos,
				570	prefix, p_start,
				571	spec->n_prefix) < 0)
				572	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	573	if (toupper) {
				574	Py_ssize_t t;
				575	/* XXX if the upper-case prefix is wider than the target
				576	buffer, the caller should have allocated a wider string,
				577	but currently doesn't. */
				578	for (t = 0; t < spec->n_prefix; ++t)
				579	PyUnicode_WRITE(kind, data, pos + t,
				580	Py_UNICODE_TOUPPER(
				581	PyUnicode_READ(kind, data, pos + t)));
				582	}
				583	pos += spec->n_prefix;
				584	}
				585	if (spec->n_spadding) {
				586	unicode_fill(out, pos, pos + spec->n_spadding, fill_char);
				587	pos += spec->n_spadding;
				588	}
				589
				590	/* Only for type 'c' special case, it has no digits. */
				591	if (spec->n_digits != 0) {
				592	/* Fill the digits with InsertThousandsGrouping. */
Victor Stinner	dba2dee	2011-09-28 21:50:42 +0200	[diff] [blame]	593	char *pdigits;
				594	if (PyUnicode_READY(digits))
				595	return -1;
				596	pdigits = PyUnicode_DATA(digits);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	597	if (PyUnicode_KIND(digits) < kind) {
				598	pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	599	if (pdigits == NULL)
				600	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	601	}
				602	#ifndef NDEBUG
				603	r =
				604	#endif
				605	_PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	606	out, kind,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame^]	607	(char)data + kind pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	608	spec->n_grouped_digits,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame^]	609	pdigits + kind * d_pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	610	spec->n_digits, spec->n_min_width,
				611	locale->grouping, locale->thousands_sep);
				612	#ifndef NDEBUG
				613	assert(r == spec->n_grouped_digits);
				614	#endif
				615	if (PyUnicode_KIND(digits) < kind)
				616	PyMem_Free(pdigits);
				617	d_pos += spec->n_digits;
				618	}
				619	if (toupper) {
				620	Py_ssize_t t;
				621	for (t = 0; t < spec->n_grouped_digits; ++t)
				622	PyUnicode_WRITE(kind, data, pos + t,
				623	Py_UNICODE_TOUPPER(
				624	PyUnicode_READ(kind, data, pos + t)));
				625	}
				626	pos += spec->n_grouped_digits;
				627
				628	if (spec->n_decimal) {
				629	Py_ssize_t t;
				630	for (t = 0; t < spec->n_decimal; ++t)
				631	PyUnicode_WRITE(kind, data, pos + t,
				632	locale->decimal_point[t]);
				633	pos += spec->n_decimal;
				634	d_pos += 1;
				635	}
				636
				637	if (spec->n_remainder) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	638	if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
				639	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	640	pos += spec->n_remainder;
				641	d_pos += spec->n_remainder;
				642	}
				643
				644	if (spec->n_rpadding) {
				645	unicode_fill(out, pos, pos + spec->n_rpadding, fill_char);
				646	pos += spec->n_rpadding;
				647	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	648	return 0;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	649	}
				650
				651	static char no_grouping[1] = {CHAR_MAX};
				652
				653	/* Find the decimal point character(s?), thousands_separator(s?), and
				654	grouping description, either for the current locale if type is
				655	LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
				656	none if LT_NO_LOCALE. */
				657	static void
				658	get_locale_info(int type, LocaleInfo *locale_info)
				659	{
				660	switch (type) {
				661	case LT_CURRENT_LOCALE: {
				662	struct lconv *locale_data = localeconv();
				663	locale_info->decimal_point = locale_data->decimal_point;
				664	locale_info->thousands_sep = locale_data->thousands_sep;
				665	locale_info->grouping = locale_data->grouping;
				666	break;
				667	}
				668	case LT_DEFAULT_LOCALE:
				669	locale_info->decimal_point = ".";
				670	locale_info->thousands_sep = ",";
				671	locale_info->grouping = "\3"; /* Group every 3 characters. The
				672	(implicit) trailing 0 means repeat
				673	infinitely. */
				674	break;
				675	case LT_NO_LOCALE:
				676	locale_info->decimal_point = ".";
				677	locale_info->thousands_sep = "";
				678	locale_info->grouping = no_grouping;
				679	break;
				680	default:
				681	assert(0);
				682	}
				683	}
				684
				685	/************************************************************************/
				686	/********* string formatting ****************************************/
				687	/************************************************************************/
				688
				689	static PyObject *
				690	format_string_internal(PyObject value, const InternalFormatSpec format)
				691	{
				692	Py_ssize_t lpad;
				693	Py_ssize_t rpad;
				694	Py_ssize_t total;
				695	Py_ssize_t pos;
				696	Py_ssize_t len = PyUnicode_GET_SIZE(value);
				697	PyObject *result = NULL;
				698	int maxchar = 127;
				699
				700	/* sign is not allowed on strings */
				701	if (format->sign != '\0') {
				702	PyErr_SetString(PyExc_ValueError,
				703	"Sign not allowed in string format specifier");
				704	goto done;
				705	}
				706
				707	/* alternate is not allowed on strings */
				708	if (format->alternate) {
				709	PyErr_SetString(PyExc_ValueError,
				710	"Alternate form (#) not allowed in string format "
				711	"specifier");
				712	goto done;
				713	}
				714
				715	/* '=' alignment not allowed on strings */
				716	if (format->align == '=') {
				717	PyErr_SetString(PyExc_ValueError,
				718	"'=' alignment not allowed "
				719	"in string format specifier");
				720	goto done;
				721	}
				722
				723	/* if precision is specified, output no more that format.precision
				724	characters */
				725	if (format->precision >= 0 && len >= format->precision) {
				726	len = format->precision;
				727	}
				728
				729	calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
				730
				731	/* allocate the resulting string */
				732	result = PyUnicode_New(total, maxchar);
				733	if (result == NULL)
				734	goto done;
				735
				736	/* Write into that space. First the padding. */
				737	pos = fill_padding(result, 0, len,
				738	format->fill_char=='\0'?' ':format->fill_char,
				739	lpad, rpad);
				740
				741	/* Then the source string. */
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	742	if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
				743	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	744
				745	done:
				746	return result;
				747	}
				748
				749
				750	/************************************************************************/
				751	/********* long formatting ******************************************/
				752	/************************************************************************/
				753
				754	typedef PyObject*
				755	(IntOrLongToString)(PyObject value, int base);
				756
				757	static PyObject *
				758	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				759	IntOrLongToString tostring)
				760	{
				761	PyObject *result = NULL;
				762	int maxchar = 127;
				763	PyObject *tmp = NULL;
				764	Py_ssize_t inumeric_chars;
				765	Py_UCS4 sign_char = '\0';
				766	Py_ssize_t n_digits; /* count of digits need from the computed
				767	string */
				768	Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
				769	produces non-digits */
				770	Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
				771	Py_ssize_t n_total;
				772	Py_ssize_t prefix;
				773	NumberFieldWidths spec;
				774	long x;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	775	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	776
				777	/* Locale settings, either from the actual locale or
				778	from a hard-code pseudo-locale */
				779	LocaleInfo locale;
				780
				781	/* no precision allowed on integers */
				782	if (format->precision != -1) {
				783	PyErr_SetString(PyExc_ValueError,
				784	"Precision not allowed in integer format specifier");
				785	goto done;
				786	}
				787
				788	/* special case for character formatting */
				789	if (format->type == 'c') {
				790	/* error to specify a sign */
				791	if (format->sign != '\0') {
				792	PyErr_SetString(PyExc_ValueError,
				793	"Sign not allowed with integer"
				794	" format specifier 'c'");
				795	goto done;
				796	}
				797
				798	/* taken from unicodeobject.c formatchar() */
				799	/* Integer input truncated to a character */
				800	/* XXX: won't work for int */
				801	x = PyLong_AsLong(value);
				802	if (x == -1 && PyErr_Occurred())
				803	goto done;
				804	if (x < 0 \|\| x > 0x10ffff) {
				805	PyErr_SetString(PyExc_OverflowError,
				806	"%c arg not in range(0x110000) "
				807	"(wide Python build)");
				808	goto done;
				809	}
				810	tmp = PyUnicode_FromOrdinal(x);
				811	inumeric_chars = 0;
				812	n_digits = 1;
				813	if (x > maxchar)
				814	maxchar = x;
				815
				816	/* As a sort-of hack, we tell calc_number_widths that we only
				817	have "remainder" characters. calc_number_widths thinks
				818	these are characters that don't get formatted, only copied
				819	into the output string. We do this for 'c' formatting,
				820	because the characters are likely to be non-digits. */
				821	n_remainder = 1;
				822	}
				823	else {
				824	int base;
				825	int leading_chars_to_skip = 0; /* Number of characters added by
				826	PyNumber_ToBase that we want to
				827	skip over. */
				828
				829	/* Compute the base and how many characters will be added by
				830	PyNumber_ToBase */
				831	switch (format->type) {
				832	case 'b':
				833	base = 2;
				834	leading_chars_to_skip = 2; /* 0b */
				835	break;
				836	case 'o':
				837	base = 8;
				838	leading_chars_to_skip = 2; /* 0o */
				839	break;
				840	case 'x':
				841	case 'X':
				842	base = 16;
				843	leading_chars_to_skip = 2; /* 0x */
				844	break;
				845	default: /* shouldn't be needed, but stops a compiler warning */
				846	case 'd':
				847	case 'n':
				848	base = 10;
				849	break;
				850	}
				851
				852	/* The number of prefix chars is the same as the leading
				853	chars to skip */
				854	if (format->alternate)
				855	n_prefix = leading_chars_to_skip;
				856
				857	/* Do the hard part, converting to a string in a given base */
				858	tmp = tostring(value, base);
				859	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -1)
				860	goto done;
				861
				862	inumeric_chars = 0;
				863	n_digits = PyUnicode_GET_LENGTH(tmp);
				864
				865	prefix = inumeric_chars;
				866
				867	/* Is a sign character present in the output? If so, remember it
				868	and skip it */
				869	if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
				870	sign_char = '-';
				871	++prefix;
				872	++leading_chars_to_skip;
				873	}
				874
				875	/* Skip over the leading chars (0x, 0b, etc.) */
				876	n_digits -= leading_chars_to_skip;
				877	inumeric_chars += leading_chars_to_skip;
				878	}
				879
				880	/* Determine the grouping, separator, and decimal point, if any. */
				881	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				882	(format->thousands_separators ?
				883	LT_DEFAULT_LOCALE :
				884	LT_NO_LOCALE),
				885	&locale);
				886
				887	/* Calculate how much memory we'll need. */
				888	n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
				889	inumeric_chars + n_digits, n_remainder, 0, &locale, format);
				890
				891	/* Allocate the memory. */
				892	result = PyUnicode_New(n_total, maxchar);
				893	if (!result)
				894	goto done;
				895
				896	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	897	err = fill_number(result, 0, &spec,
				898	tmp, inumeric_chars, inumeric_chars + n_digits,
				899	tmp, prefix,
				900	format->fill_char == '\0' ? ' ' : format->fill_char,
				901	&locale, format->type == 'X');
				902	if (err)
				903	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	904
				905	done:
				906	Py_XDECREF(tmp);
				907	return result;
				908	}
				909
				910	/************************************************************************/
				911	/********* float formatting *****************************************/
				912	/************************************************************************/
				913
				914	static PyObject*
				915	strtounicode(char *charbuffer, Py_ssize_t len)
				916	{
				917	return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
				918	}
				919
				920	/* much of this is taken from unicodeobject.c */
				921	static PyObject *
				922	format_float_internal(PyObject *value,
				923	const InternalFormatSpec *format)
				924	{
				925	char buf = NULL; / buffer returned from PyOS_double_to_string */
				926	Py_ssize_t n_digits;
				927	Py_ssize_t n_remainder;
				928	Py_ssize_t n_total;
				929	int has_decimal;
				930	double val;
				931	Py_ssize_t precision = format->precision;
				932	Py_ssize_t default_precision = 6;
				933	Py_UCS4 type = format->type;
				934	int add_pct = 0;
				935	Py_ssize_t index;
				936	NumberFieldWidths spec;
				937	int flags = 0;
				938	PyObject *result = NULL;
				939	int maxchar = 127;
				940	Py_UCS4 sign_char = '\0';
				941	int float_type; /* Used to see if we have a nan, inf, or regular float. */
				942	PyObject *unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	943	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	944
				945	/* Locale settings, either from the actual locale or
				946	from a hard-code pseudo-locale */
				947	LocaleInfo locale;
				948
				949	if (format->alternate)
				950	flags \|= Py_DTSF_ALT;
				951
				952	if (type == '\0') {
				953	/* Omitted type specifier. Behaves in the same way as repr(x)
				954	and str(x) if no precision is given, else like 'g', but with
				955	at least one digit after the decimal point. */
				956	flags \|= Py_DTSF_ADD_DOT_0;
				957	type = 'r';
				958	default_precision = 0;
				959	}
				960
				961	if (type == 'n')
				962	/* 'n' is the same as 'g', except for the locale used to
				963	format the result. We take care of that later. */
				964	type = 'g';
				965
				966	val = PyFloat_AsDouble(value);
				967	if (val == -1.0 && PyErr_Occurred())
				968	goto done;
				969
				970	if (type == '%') {
				971	type = 'f';
				972	val *= 100;
				973	add_pct = 1;
				974	}
				975
				976	if (precision < 0)
				977	precision = default_precision;
				978	else if (type == 'r')
				979	type = 'g';
				980
				981	/* Cast "type", because if we're in unicode we need to pass a
				982	8-bit char. This is safe, because we've restricted what "type"
				983	can be. */
				984	buf = PyOS_double_to_string(val, (char)type, precision, flags,
				985	&float_type);
				986	if (buf == NULL)
				987	goto done;
				988	n_digits = strlen(buf);
				989
				990	if (add_pct) {
				991	/* We know that buf has a trailing zero (since we just called
				992	strlen() on it), and we don't use that fact any more. So we
				993	can just write over the trailing zero. */
				994	buf[n_digits] = '%';
				995	n_digits += 1;
				996	}
				997
				998	/* Since there is no unicode version of PyOS_double_to_string,
				999	just use the 8 bit version and then convert to unicode. */
				1000	unicode_tmp = strtounicode(buf, n_digits);
				1001	if (unicode_tmp == NULL)
				1002	goto done;
				1003	index = 0;
				1004
				1005	/* Is a sign character present in the output? If so, remember it
				1006	and skip it */
				1007	if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
				1008	sign_char = '-';
				1009	++index;
				1010	--n_digits;
				1011	}
				1012
				1013	/* Determine if we have any "remainder" (after the digits, might include
				1014	decimal or exponent or both (or neither)) */
				1015	parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
				1016
				1017	/* Determine the grouping, separator, and decimal point, if any. */
				1018	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1019	(format->thousands_separators ?
				1020	LT_DEFAULT_LOCALE :
				1021	LT_NO_LOCALE),
				1022	&locale);
				1023
				1024	/* Calculate how much memory we'll need. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1025	n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1026	index + n_digits, n_remainder, has_decimal,
				1027	&locale, format);
				1028
				1029	/* Allocate the memory. */
				1030	result = PyUnicode_New(n_total, maxchar);
				1031	if (result == NULL)
				1032	goto done;
				1033
				1034	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1035	err = fill_number(result, 0, &spec,
				1036	unicode_tmp, index, index + n_digits,
				1037	NULL, 0,
				1038	format->fill_char == '\0' ? ' ' : format->fill_char,
				1039	&locale, 0);
				1040	if (err)
				1041	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1042
				1043	done:
				1044	PyMem_Free(buf);
				1045	Py_DECREF(unicode_tmp);
				1046	return result;
				1047	}
				1048
				1049	/************************************************************************/
				1050	/********* complex formatting ***************************************/
				1051	/************************************************************************/
				1052
				1053	static PyObject *
				1054	format_complex_internal(PyObject *value,
				1055	const InternalFormatSpec *format)
				1056	{
				1057	double re;
				1058	double im;
				1059	char re_buf = NULL; / buffer returned from PyOS_double_to_string */
				1060	char im_buf = NULL; / buffer returned from PyOS_double_to_string */
				1061
				1062	InternalFormatSpec tmp_format = *format;
				1063	Py_ssize_t n_re_digits;
				1064	Py_ssize_t n_im_digits;
				1065	Py_ssize_t n_re_remainder;
				1066	Py_ssize_t n_im_remainder;
				1067	Py_ssize_t n_re_total;
				1068	Py_ssize_t n_im_total;
				1069	int re_has_decimal;
				1070	int im_has_decimal;
				1071	Py_ssize_t precision = format->precision;
				1072	Py_ssize_t default_precision = 6;
				1073	Py_UCS4 type = format->type;
				1074	Py_ssize_t i_re;
				1075	Py_ssize_t i_im;
				1076	NumberFieldWidths re_spec;
				1077	NumberFieldWidths im_spec;
				1078	int flags = 0;
				1079	PyObject *result = NULL;
				1080	int maxchar = 127;
				1081	int rkind;
				1082	void *rdata;
				1083	Py_ssize_t index;
				1084	Py_UCS4 re_sign_char = '\0';
				1085	Py_UCS4 im_sign_char = '\0';
				1086	int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
				1087	int im_float_type;
				1088	int add_parens = 0;
				1089	int skip_re = 0;
				1090	Py_ssize_t lpad;
				1091	Py_ssize_t rpad;
				1092	Py_ssize_t total;
				1093	PyObject *re_unicode_tmp = NULL;
				1094	PyObject *im_unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1095	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1096
				1097	/* Locale settings, either from the actual locale or
				1098	from a hard-code pseudo-locale */
				1099	LocaleInfo locale;
				1100
				1101	/* Zero padding is not allowed. */
				1102	if (format->fill_char == '0') {
				1103	PyErr_SetString(PyExc_ValueError,
				1104	"Zero padding is not allowed in complex format "
				1105	"specifier");
				1106	goto done;
				1107	}
				1108
				1109	/* Neither is '=' alignment . */
				1110	if (format->align == '=') {
				1111	PyErr_SetString(PyExc_ValueError,
				1112	"'=' alignment flag is not allowed in complex format "
				1113	"specifier");
				1114	goto done;
				1115	}
				1116
				1117	re = PyComplex_RealAsDouble(value);
				1118	if (re == -1.0 && PyErr_Occurred())
				1119	goto done;
				1120	im = PyComplex_ImagAsDouble(value);
				1121	if (im == -1.0 && PyErr_Occurred())
				1122	goto done;
				1123
				1124	if (format->alternate)
				1125	flags \|= Py_DTSF_ALT;
				1126
				1127	if (type == '\0') {
				1128	/* Omitted type specifier. Should be like str(self). */
				1129	type = 'r';
				1130	default_precision = 0;
				1131	if (re == 0.0 && copysign(1.0, re) == 1.0)
				1132	skip_re = 1;
				1133	else
				1134	add_parens = 1;
				1135	}
				1136
				1137	if (type == 'n')
				1138	/* 'n' is the same as 'g', except for the locale used to
				1139	format the result. We take care of that later. */
				1140	type = 'g';
				1141
				1142	if (precision < 0)
				1143	precision = default_precision;
				1144	else if (type == 'r')
				1145	type = 'g';
				1146
				1147	/* Cast "type", because if we're in unicode we need to pass a
				1148	8-bit char. This is safe, because we've restricted what "type"
				1149	can be. */
				1150	re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
				1151	&re_float_type);
				1152	if (re_buf == NULL)
				1153	goto done;
				1154	im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
				1155	&im_float_type);
				1156	if (im_buf == NULL)
				1157	goto done;
				1158
				1159	n_re_digits = strlen(re_buf);
				1160	n_im_digits = strlen(im_buf);
				1161
				1162	/* Since there is no unicode version of PyOS_double_to_string,
				1163	just use the 8 bit version and then convert to unicode. */
				1164	re_unicode_tmp = strtounicode(re_buf, n_re_digits);
				1165	if (re_unicode_tmp == NULL)
				1166	goto done;
				1167	i_re = 0;
				1168
				1169	im_unicode_tmp = strtounicode(im_buf, n_im_digits);
				1170	if (im_unicode_tmp == NULL)
				1171	goto done;
				1172	i_im = 0;
				1173
				1174	/* Is a sign character present in the output? If so, remember it
				1175	and skip it */
				1176	if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
				1177	re_sign_char = '-';
				1178	++i_re;
				1179	--n_re_digits;
				1180	}
				1181	if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
				1182	im_sign_char = '-';
				1183	++i_im;
				1184	--n_im_digits;
				1185	}
				1186
				1187	/* Determine if we have any "remainder" (after the digits, might include
				1188	decimal or exponent or both (or neither)) */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1189	parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1190	&n_re_remainder, &re_has_decimal);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1191	parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1192	&n_im_remainder, &im_has_decimal);
				1193
				1194	/* Determine the grouping, separator, and decimal point, if any. */
				1195	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1196	(format->thousands_separators ?
				1197	LT_DEFAULT_LOCALE :
				1198	LT_NO_LOCALE),
				1199	&locale);
				1200
				1201	/* Turn off any padding. We'll do it later after we've composed
				1202	the numbers without padding. */
				1203	tmp_format.fill_char = '\0';
				1204	tmp_format.align = '<';
				1205	tmp_format.width = -1;
				1206
				1207	/* Calculate how much memory we'll need. */
				1208	n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
				1209	i_re, i_re + n_re_digits, n_re_remainder,
				1210	re_has_decimal, &locale, &tmp_format);
				1211
				1212	/* Same formatting, but always include a sign, unless the real part is
				1213	* going to be omitted, in which case we use whatever sign convention was
				1214	* requested by the original format. */
				1215	if (!skip_re)
				1216	tmp_format.sign = '+';
				1217	n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
				1218	i_im, i_im + n_im_digits, n_im_remainder,
				1219	im_has_decimal, &locale, &tmp_format);
				1220
				1221	if (skip_re)
				1222	n_re_total = 0;
				1223
				1224	/* Add 1 for the 'j', and optionally 2 for parens. */
				1225	calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
				1226	format->width, format->align, &lpad, &rpad, &total);
				1227
				1228	result = PyUnicode_New(total, maxchar);
				1229	if (result == NULL)
				1230	goto done;
				1231	rkind = PyUnicode_KIND(result);
				1232	rdata = PyUnicode_DATA(result);
				1233
				1234	/* Populate the memory. First, the padding. */
				1235	index = fill_padding(result, 0,
				1236	n_re_total + n_im_total + 1 + add_parens * 2,
				1237	format->fill_char=='\0' ? ' ' : format->fill_char,
				1238	lpad, rpad);
				1239
				1240	if (add_parens)
				1241	PyUnicode_WRITE(rkind, rdata, index++, '(');
				1242
				1243	if (!skip_re) {
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1244	err = fill_number(result, index, &re_spec,
				1245	re_unicode_tmp, i_re, i_re + n_re_digits,
				1246	NULL, 0,
				1247	0,
				1248	&locale, 0);
				1249	if (err) {
				1250	Py_CLEAR(result);
				1251	goto done;
				1252	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1253	index += n_re_total;
				1254	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1255	err = fill_number(result, index, &im_spec,
				1256	im_unicode_tmp, i_im, i_im + n_im_digits,
				1257	NULL, 0,
				1258	0,
				1259	&locale, 0);
				1260	if (err) {
				1261	Py_CLEAR(result);
				1262	goto done;
				1263	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1264	index += n_im_total;
				1265	PyUnicode_WRITE(rkind, rdata, index++, 'j');
				1266
				1267	if (add_parens)
				1268	PyUnicode_WRITE(rkind, rdata, index++, ')');
				1269
				1270	done:
				1271	PyMem_Free(re_buf);
				1272	PyMem_Free(im_buf);
				1273	Py_XDECREF(re_unicode_tmp);
				1274	Py_XDECREF(im_unicode_tmp);
				1275	return result;
				1276	}
				1277
				1278	/************************************************************************/
				1279	/********* built in formatters **************************************/
				1280	/************************************************************************/
				1281	PyObject *
				1282	_PyUnicode_FormatAdvanced(PyObject *obj,
				1283	PyObject *format_spec,
				1284	Py_ssize_t start, Py_ssize_t end)
				1285	{
				1286	InternalFormatSpec format;
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1287	PyObject *result;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1288
				1289	/* check for the special case of zero length format spec, make
				1290	it equivalent to str(obj) */
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1291	if (start == end)
				1292	return PyObject_Str(obj);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1293
				1294	/* parse the format_spec */
				1295	if (!parse_internal_render_format_spec(format_spec, start, end,
				1296	&format, 's', '<'))
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1297	return NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1298
				1299	/* type conversion? */
				1300	switch (format.type) {
				1301	case 's':
				1302	/* no type conversion needed, already a string. do the formatting */
				1303	result = format_string_internal(obj, &format);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1304	if (result != NULL)
				1305	assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1306	break;
				1307	default:
				1308	/* unknown */
				1309	unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1310	result = NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1311	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1312	return result;
				1313	}
				1314
				1315	static PyObject*
				1316	format_int_or_long(PyObject* obj, PyObject* format_spec,
				1317	Py_ssize_t start, Py_ssize_t end,
				1318	IntOrLongToString tostring)
				1319	{
				1320	PyObject *result = NULL;
				1321	PyObject *tmp = NULL;
				1322	InternalFormatSpec format;
				1323
				1324	/* check for the special case of zero length format spec, make
				1325	it equivalent to str(obj) */
				1326	if (start == end) {
				1327	result = PyObject_Str(obj);
				1328	goto done;
				1329	}
				1330
				1331	/* parse the format_spec */
				1332	if (!parse_internal_render_format_spec(format_spec, start, end,
				1333	&format, 'd', '>'))
				1334	goto done;
				1335
				1336	/* type conversion? */
				1337	switch (format.type) {
				1338	case 'b':
				1339	case 'c':
				1340	case 'd':
				1341	case 'o':
				1342	case 'x':
				1343	case 'X':
				1344	case 'n':
				1345	/* no type conversion needed, already an int (or long). do
				1346	the formatting */
				1347	result = format_int_or_long_internal(obj, &format, tostring);
				1348	break;
				1349
				1350	case 'e':
				1351	case 'E':
				1352	case 'f':
				1353	case 'F':
				1354	case 'g':
				1355	case 'G':
				1356	case '%':
				1357	/* convert to float */
				1358	tmp = PyNumber_Float(obj);
				1359	if (tmp == NULL)
				1360	goto done;
				1361	result = format_float_internal(tmp, &format);
				1362	break;
				1363
				1364	default:
				1365	/* unknown */
				1366	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1367	goto done;
				1368	}
				1369
				1370	done:
				1371	Py_XDECREF(tmp);
				1372	return result;
				1373	}
				1374
				1375	/* Need to define long_format as a function that will convert a long
				1376	to a string. In 3.0, _PyLong_Format has the correct signature. */
				1377	#define long_format _PyLong_Format
				1378
				1379	PyObject *
				1380	_PyLong_FormatAdvanced(PyObject *obj,
				1381	PyObject *format_spec,
				1382	Py_ssize_t start, Py_ssize_t end)
				1383	{
				1384	return format_int_or_long(obj, format_spec, start, end,
				1385	long_format);
				1386	}
				1387
				1388	PyObject *
				1389	_PyFloat_FormatAdvanced(PyObject *obj,
				1390	PyObject *format_spec,
				1391	Py_ssize_t start, Py_ssize_t end)
				1392	{
				1393	PyObject *result = NULL;
				1394	InternalFormatSpec format;
				1395
				1396	/* check for the special case of zero length format spec, make
				1397	it equivalent to str(obj) */
				1398	if (start == end) {
				1399	result = PyObject_Str(obj);
				1400	goto done;
				1401	}
				1402
				1403	/* parse the format_spec */
				1404	if (!parse_internal_render_format_spec(format_spec, start, end,
				1405	&format, '\0', '>'))
				1406	goto done;
				1407
				1408	/* type conversion? */
				1409	switch (format.type) {
				1410	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1411	case 'e':
				1412	case 'E':
				1413	case 'f':
				1414	case 'F':
				1415	case 'g':
				1416	case 'G':
				1417	case 'n':
				1418	case '%':
				1419	/* no conversion, already a float. do the formatting */
				1420	result = format_float_internal(obj, &format);
				1421	break;
				1422
				1423	default:
				1424	/* unknown */
				1425	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1426	goto done;
				1427	}
				1428
				1429	done:
				1430	return result;
				1431	}
				1432
				1433	PyObject *
				1434	_PyComplex_FormatAdvanced(PyObject *obj,
				1435	PyObject *format_spec,
				1436	Py_ssize_t start, Py_ssize_t end)
				1437	{
				1438	PyObject *result = NULL;
				1439	InternalFormatSpec format;
				1440
				1441	/* check for the special case of zero length format spec, make
				1442	it equivalent to str(obj) */
				1443	if (start == end) {
				1444	result = PyObject_Str(obj);
				1445	goto done;
				1446	}
				1447
				1448	/* parse the format_spec */
				1449	if (!parse_internal_render_format_spec(format_spec, start, end,
				1450	&format, '\0', '>'))
				1451	goto done;
				1452
				1453	/* type conversion? */
				1454	switch (format.type) {
				1455	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1456	case 'e':
				1457	case 'E':
				1458	case 'f':
				1459	case 'F':
				1460	case 'g':
				1461	case 'G':
				1462	case 'n':
				1463	/* no conversion, already a complex. do the formatting */
				1464	result = format_complex_internal(obj, &format);
				1465	break;
				1466
				1467	default:
				1468	/* unknown */
				1469	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1470	goto done;
				1471	}
				1472
				1473	done:
				1474	return result;
				1475	}