Blame - Python/formatter_unicode.c - platform/external/python/cpython3

blob: ed716a5b971747b78b0db243d21c4512c0196d49 [file] [log] [blame]

Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	1	/* implements the unicode (as opposed to string) version of the
				2	built-in formatters for string, int, float. that is, the versions
				3	of int.__float__, etc., that take and return unicode objects */
				4
				5	#include "Python.h"
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	6	#include <locale.h>
				7
				8	/* Raises an exception about an unknown presentation type for this
				9	* type. */
				10
				11	static void
				12	unknown_presentation_type(Py_UCS4 presentation_type,
				13	const char* type_name)
				14	{
				15	/* %c might be out-of-range, hence the two cases. */
				16	if (presentation_type > 32 && presentation_type < 128)
				17	PyErr_Format(PyExc_ValueError,
				18	"Unknown format code '%c' "
				19	"for object of type '%.200s'",
				20	(char)presentation_type,
				21	type_name);
				22	else
				23	PyErr_Format(PyExc_ValueError,
				24	"Unknown format code '\\x%x' "
				25	"for object of type '%.200s'",
				26	(unsigned int)presentation_type,
				27	type_name);
				28	}
				29
				30	static void
				31	invalid_comma_type(Py_UCS4 presentation_type)
				32	{
				33	if (presentation_type > 32 && presentation_type < 128)
				34	PyErr_Format(PyExc_ValueError,
				35	"Cannot specify ',' with '%c'.",
				36	(char)presentation_type);
				37	else
				38	PyErr_Format(PyExc_ValueError,
				39	"Cannot specify ',' with '\\x%x'.",
				40	(unsigned int)presentation_type);
				41	}
				42
				43	/*
				44	get_integer consumes 0 or more decimal digit characters from an
				45	input string, updates *result with the corresponding positive
				46	integer, and returns the number of digits consumed.
				47
				48	returns -1 on error.
				49	*/
				50	static int
				51	get_integer(PyObject str, Py_ssize_t pos, Py_ssize_t end,
				52	Py_ssize_t *result)
				53	{
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	54	Py_ssize_t accumulator, digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	55	int numdigits;
				56	accumulator = numdigits = 0;
				57	for (;;(*pos)++, numdigits++) {
				58	if (*pos >= end)
				59	break;
				60	digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
				61	if (digitval < 0)
				62	break;
				63	/*
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	64	Detect possible overflow before it happens:
				65
				66	accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
				67	accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	68	*/
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	69	if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	70	PyErr_Format(PyExc_ValueError,
				71	"Too many decimal digits in format string");
				72	return -1;
				73	}
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	74	accumulator = accumulator * 10 + digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	75	}
				76	*result = accumulator;
				77	return numdigits;
				78	}
				79
				80	/************************************************************************/
				81	/********* standard format specifier parsing ************************/
				82	/************************************************************************/
				83
				84	/* returns true if this character is a specifier alignment token */
				85	Py_LOCAL_INLINE(int)
				86	is_alignment_token(Py_UCS4 c)
				87	{
				88	switch (c) {
				89	case '<': case '>': case '=': case '^':
				90	return 1;
				91	default:
				92	return 0;
				93	}
				94	}
				95
				96	/* returns true if this character is a sign element */
				97	Py_LOCAL_INLINE(int)
				98	is_sign_element(Py_UCS4 c)
				99	{
				100	switch (c) {
				101	case ' ': case '+': case '-':
				102	return 1;
				103	default:
				104	return 0;
				105	}
				106	}
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	107
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	108
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	109	typedef struct {
				110	Py_UCS4 fill_char;
				111	Py_UCS4 align;
				112	int alternate;
				113	Py_UCS4 sign;
				114	Py_ssize_t width;
				115	int thousands_separators;
				116	Py_ssize_t precision;
				117	Py_UCS4 type;
				118	} InternalFormatSpec;
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	119
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	120	#if 0
				121	/* Occassionally useful for debugging. Should normally be commented out. */
				122	static void
				123	DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
				124	{
				125	printf("internal format spec: fill_char %d\n", format->fill_char);
				126	printf("internal format spec: align %d\n", format->align);
				127	printf("internal format spec: alternate %d\n", format->alternate);
				128	printf("internal format spec: sign %d\n", format->sign);
				129	printf("internal format spec: width %zd\n", format->width);
				130	printf("internal format spec: thousands_separators %d\n",
				131	format->thousands_separators);
				132	printf("internal format spec: precision %zd\n", format->precision);
				133	printf("internal format spec: type %c\n", format->type);
				134	printf("\n");
				135	}
				136	#endif
				137
				138
				139	/*
				140	ptr points to the start of the format_spec, end points just past its end.
				141	fills in format with the parsed information.
				142	returns 1 on success, 0 on failure.
				143	if failure, sets the exception
				144	*/
				145	static int
				146	parse_internal_render_format_spec(PyObject *format_spec,
				147	Py_ssize_t start, Py_ssize_t end,
				148	InternalFormatSpec *format,
				149	char default_type,
				150	char default_align)
				151	{
				152	Py_ssize_t pos = start;
				153	/* end-pos is used throughout this code to specify the length of
				154	the input string */
				155	#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
				156
				157	Py_ssize_t consumed;
				158	int align_specified = 0;
				159
				160	format->fill_char = '\0';
				161	format->align = default_align;
				162	format->alternate = 0;
				163	format->sign = '\0';
				164	format->width = -1;
				165	format->thousands_separators = 0;
				166	format->precision = -1;
				167	format->type = default_type;
				168
				169	/* If the second char is an alignment token,
				170	then parse the fill char */
				171	if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
				172	format->align = READ_spec(pos+1);
				173	format->fill_char = READ_spec(pos);
				174	align_specified = 1;
				175	pos += 2;
				176	}
				177	else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
				178	format->align = READ_spec(pos);
				179	align_specified = 1;
				180	++pos;
				181	}
				182
				183	/* Parse the various sign options */
				184	if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
				185	format->sign = READ_spec(pos);
				186	++pos;
				187	}
				188
				189	/* If the next character is #, we're in alternate mode. This only
				190	applies to integers. */
				191	if (end-pos >= 1 && READ_spec(pos) == '#') {
				192	format->alternate = 1;
				193	++pos;
				194	}
				195
				196	/* The special case for 0-padding (backwards compat) */
				197	if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
				198	format->fill_char = '0';
				199	if (!align_specified) {
				200	format->align = '=';
				201	}
				202	++pos;
				203	}
				204
				205	consumed = get_integer(format_spec, &pos, end, &format->width);
				206	if (consumed == -1)
				207	/* Overflow error. Exception already set. */
				208	return 0;
				209
				210	/* If consumed is 0, we didn't consume any characters for the
				211	width. In that case, reset the width to -1, because
				212	get_integer() will have set it to zero. -1 is how we record
				213	that the width wasn't specified. */
				214	if (consumed == 0)
				215	format->width = -1;
				216
				217	/* Comma signifies add thousands separators */
				218	if (end-pos && READ_spec(pos) == ',') {
				219	format->thousands_separators = 1;
				220	++pos;
				221	}
				222
				223	/* Parse field precision */
				224	if (end-pos && READ_spec(pos) == '.') {
				225	++pos;
				226
				227	consumed = get_integer(format_spec, &pos, end, &format->precision);
				228	if (consumed == -1)
				229	/* Overflow error. Exception already set. */
				230	return 0;
				231
				232	/* Not having a precision after a dot is an error. */
				233	if (consumed == 0) {
				234	PyErr_Format(PyExc_ValueError,
				235	"Format specifier missing precision");
				236	return 0;
				237	}
				238
				239	}
				240
				241	/* Finally, parse the type field. */
				242
				243	if (end-pos > 1) {
Eric V. Smith	d25cfe6	2012-01-19 20:04:28 -0500	[diff] [blame]	244	/* More than one char remain, invalid format specifier. */
				245	PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	246	return 0;
				247	}
				248
				249	if (end-pos == 1) {
				250	format->type = READ_spec(pos);
				251	++pos;
				252	}
				253
				254	/* Do as much validating as we can, just by looking at the format
				255	specifier. Do not take into account what type of formatting
				256	we're doing (int, float, string). */
				257
				258	if (format->thousands_separators) {
				259	switch (format->type) {
				260	case 'd':
				261	case 'e':
				262	case 'f':
				263	case 'g':
				264	case 'E':
				265	case 'G':
				266	case '%':
				267	case 'F':
				268	case '\0':
				269	/* These are allowed. See PEP 378.*/
				270	break;
				271	default:
				272	invalid_comma_type(format->type);
				273	return 0;
				274	}
				275	}
				276
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	277	assert (format->align <= 127);
				278	assert (format->sign <= 127);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	279	return 1;
				280	}
				281
				282	/* Calculate the padding needed. */
				283	static void
				284	calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
				285	Py_ssize_t n_lpadding, Py_ssize_t n_rpadding,
				286	Py_ssize_t *n_total)
				287	{
				288	if (width >= 0) {
				289	if (nchars > width)
				290	*n_total = nchars;
				291	else
				292	*n_total = width;
				293	}
				294	else {
				295	/* not specified, use all of the chars and no more */
				296	*n_total = nchars;
				297	}
				298
				299	/* Figure out how much leading space we need, based on the
				300	aligning */
				301	if (align == '>')
				302	n_lpadding = n_total - nchars;
				303	else if (align == '^')
				304	n_lpadding = (n_total - nchars) / 2;
				305	else if (align == '<' \|\| align == '=')
				306	*n_lpadding = 0;
				307	else {
				308	/* We should never have an unspecified alignment. */
				309	*n_lpadding = 0;
				310	assert(0);
				311	}
				312
				313	n_rpadding = n_total - nchars - *n_lpadding;
				314	}
				315
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	316	/* Do the padding, and return a pointer to where the caller-supplied
				317	content goes. */
				318	static Py_ssize_t
				319	fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
				320	Py_UCS4 fill_char, Py_ssize_t n_lpadding,
				321	Py_ssize_t n_rpadding)
				322	{
				323	/* Pad on left. */
				324	if (n_lpadding)
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	325	PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	326
				327	/* Pad on right. */
				328	if (n_rpadding)
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	329	PyUnicode_Fill(s, start + nchars + n_lpadding,
				330	start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	331
				332	/* Pointer to the user content. */
				333	return start + n_lpadding;
				334	}
				335
				336	/************************************************************************/
				337	/********* common routines for numeric formatting *******************/
				338	/************************************************************************/
				339
				340	/* Locale type codes. */
				341	#define LT_CURRENT_LOCALE 0
				342	#define LT_DEFAULT_LOCALE 1
				343	#define LT_NO_LOCALE 2
				344
				345	/* Locale info needed for formatting integers and the part of floats
				346	before and including the decimal. Note that locales only support
				347	8-bit chars, not unicode. */
				348	typedef struct {
				349	char *decimal_point;
				350	char *thousands_sep;
				351	char *grouping;
				352	} LocaleInfo;
				353
				354	/* describes the layout for an integer, see the comment in
				355	calc_number_widths() for details */
				356	typedef struct {
				357	Py_ssize_t n_lpadding;
				358	Py_ssize_t n_prefix;
				359	Py_ssize_t n_spadding;
				360	Py_ssize_t n_rpadding;
				361	char sign;
				362	Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
				363	Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
				364	any grouping chars. */
				365	Py_ssize_t n_decimal; /* 0 if only an integer */
				366	Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
				367	excluding the decimal itself, if
				368	present. */
				369
				370	/* These 2 are not the widths of fields, but are needed by
				371	STRINGLIB_GROUPING. */
				372	Py_ssize_t n_digits; /* The number of digits before a decimal
				373	or exponent. */
				374	Py_ssize_t n_min_width; /* The min_width we used when we computed
				375	the n_grouped_digits width. */
				376	} NumberFieldWidths;
				377
				378
				379	/* Given a number of the form:
				380	digits[remainder]
				381	where ptr points to the start and end points to the end, find where
				382	the integer part ends. This could be a decimal, an exponent, both,
				383	or neither.
				384	If a decimal point is present, set *has_decimal and increment
				385	remainder beyond it.
				386	Results are undefined (but shouldn't crash) for improperly
				387	formatted strings.
				388	*/
				389	static void
				390	parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
				391	Py_ssize_t n_remainder, int has_decimal)
				392	{
				393	Py_ssize_t remainder;
				394
				395	while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
				396	++pos;
				397	remainder = pos;
				398
				399	/* Does remainder start with a decimal point? */
				400	*has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
				401
				402	/* Skip the decimal point. */
				403	if (*has_decimal)
				404	remainder++;
				405
				406	*n_remainder = end - remainder;
				407	}
				408
				409	/* not all fields of format are used. for example, precision is
				410	unused. should this take discrete params in order to be more clear
				411	about what it does? or is passing a single format parameter easier
				412	and more efficient enough to justify a little obfuscation? */
				413	static Py_ssize_t
				414	calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
				415	Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
				416	Py_ssize_t n_end, Py_ssize_t n_remainder,
				417	int has_decimal, const LocaleInfo *locale,
				418	const InternalFormatSpec *format)
				419	{
				420	Py_ssize_t n_non_digit_non_padding;
				421	Py_ssize_t n_padding;
				422
				423	spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
				424	spec->n_lpadding = 0;
				425	spec->n_prefix = n_prefix;
				426	spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
				427	spec->n_remainder = n_remainder;
				428	spec->n_spadding = 0;
				429	spec->n_rpadding = 0;
				430	spec->sign = '\0';
				431	spec->n_sign = 0;
				432
				433	/* the output will look like:
				434	\| \|
				435	\| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> \|
				436	\| \|
				437
				438	sign is computed from format->sign and the actual
				439	sign of the number
				440
				441	prefix is given (it's for the '0x' prefix)
				442
				443	digits is already known
				444
				445	the total width is either given, or computed from the
				446	actual digits
				447
				448	only one of lpadding, spadding, and rpadding can be non-zero,
				449	and it's calculated from the width and other fields
				450	*/
				451
				452	/* compute the various parts we're going to write */
				453	switch (format->sign) {
				454	case '+':
				455	/* always put a + or - */
				456	spec->n_sign = 1;
				457	spec->sign = (sign_char == '-' ? '-' : '+');
				458	break;
				459	case ' ':
				460	spec->n_sign = 1;
				461	spec->sign = (sign_char == '-' ? '-' : ' ');
				462	break;
				463	default:
				464	/* Not specified, or the default (-) */
				465	if (sign_char == '-') {
				466	spec->n_sign = 1;
				467	spec->sign = '-';
				468	}
				469	}
				470
				471	/* The number of chars used for non-digits and non-padding. */
				472	n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
				473	spec->n_remainder;
				474
				475	/* min_width can go negative, that's okay. format->width == -1 means
				476	we don't care. */
				477	if (format->fill_char == '0' && format->align == '=')
				478	spec->n_min_width = format->width - n_non_digit_non_padding;
				479	else
				480	spec->n_min_width = 0;
				481
				482	if (spec->n_digits == 0)
				483	/* This case only occurs when using 'c' formatting, we need
				484	to special case it because the grouping code always wants
				485	to have at least one character. */
				486	spec->n_grouped_digits = 0;
				487	else
				488	spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	489	NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	490	spec->n_digits, spec->n_min_width,
				491	locale->grouping, locale->thousands_sep);
				492
				493	/* Given the desired width and the total of digit and non-digit
				494	space we consume, see if we need any padding. format->width can
				495	be negative (meaning no padding), but this code still works in
				496	that case. */
				497	n_padding = format->width -
				498	(n_non_digit_non_padding + spec->n_grouped_digits);
				499	if (n_padding > 0) {
				500	/* Some padding is needed. Determine if it's left, space, or right. */
				501	switch (format->align) {
				502	case '<':
				503	spec->n_rpadding = n_padding;
				504	break;
				505	case '^':
				506	spec->n_lpadding = n_padding / 2;
				507	spec->n_rpadding = n_padding - spec->n_lpadding;
				508	break;
				509	case '=':
				510	spec->n_spadding = n_padding;
				511	break;
				512	case '>':
				513	spec->n_lpadding = n_padding;
				514	break;
				515	default:
				516	/* Shouldn't get here, but treat it as '>' */
				517	spec->n_lpadding = n_padding;
				518	assert(0);
				519	break;
				520	}
				521	}
				522	return spec->n_lpadding + spec->n_sign + spec->n_prefix +
				523	spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
				524	spec->n_remainder + spec->n_rpadding;
				525	}
				526
				527	/* Fill in the digit parts of a numbers's string representation,
				528	as determined in calc_number_widths().
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	529	Return -1 on error, or 0 on success. */
				530	static int
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	531	fill_number(PyObject out, Py_ssize_t pos, const NumberFieldWidths spec,
				532	PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	533	PyObject *prefix, Py_ssize_t p_start,
				534	Py_UCS4 fill_char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	535	LocaleInfo *locale, int toupper)
				536	{
				537	/* Used to keep track of digits, decimal, and remainder. */
				538	Py_ssize_t d_pos = d_start;
				539	unsigned int kind = PyUnicode_KIND(out);
				540	void *data = PyUnicode_DATA(out);
				541
				542	#ifndef NDEBUG
				543	Py_ssize_t r;
				544	#endif
				545
				546	if (spec->n_lpadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	547	PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	548	pos += spec->n_lpadding;
				549	}
				550	if (spec->n_sign == 1) {
				551	PyUnicode_WRITE(kind, data, pos++, spec->sign);
				552	}
				553	if (spec->n_prefix) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	554	if (PyUnicode_CopyCharacters(out, pos,
				555	prefix, p_start,
				556	spec->n_prefix) < 0)
				557	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	558	if (toupper) {
				559	Py_ssize_t t;
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	560	for (t = 0; t < spec->n_prefix; t++) {
				561	Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	562	c = Py_TOUPPER(c);
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	563	assert (c <= 127);
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	564	PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	565	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	566	}
				567	pos += spec->n_prefix;
				568	}
				569	if (spec->n_spadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	570	PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	571	pos += spec->n_spadding;
				572	}
				573
				574	/* Only for type 'c' special case, it has no digits. */
				575	if (spec->n_digits != 0) {
				576	/* Fill the digits with InsertThousandsGrouping. */
Victor Stinner	dba2dee	2011-09-28 21:50:42 +0200	[diff] [blame]	577	char *pdigits;
				578	if (PyUnicode_READY(digits))
				579	return -1;
				580	pdigits = PyUnicode_DATA(digits);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	581	if (PyUnicode_KIND(digits) < kind) {
				582	pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	583	if (pdigits == NULL)
				584	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	585	}
				586	#ifndef NDEBUG
				587	r =
				588	#endif
				589	_PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	590	out, kind,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	591	(char)data + kind pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	592	spec->n_grouped_digits,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	593	pdigits + kind * d_pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	594	spec->n_digits, spec->n_min_width,
				595	locale->grouping, locale->thousands_sep);
				596	#ifndef NDEBUG
				597	assert(r == spec->n_grouped_digits);
				598	#endif
				599	if (PyUnicode_KIND(digits) < kind)
				600	PyMem_Free(pdigits);
				601	d_pos += spec->n_digits;
				602	}
				603	if (toupper) {
				604	Py_ssize_t t;
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	605	for (t = 0; t < spec->n_grouped_digits; t++) {
				606	Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	607	c = Py_TOUPPER(c);
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	608	if (c > 127) {
				609	PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
				610	return -1;
				611	}
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	612	PyUnicode_WRITE(kind, data, pos + t, c);
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	613	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	614	}
				615	pos += spec->n_grouped_digits;
				616
				617	if (spec->n_decimal) {
				618	Py_ssize_t t;
				619	for (t = 0; t < spec->n_decimal; ++t)
				620	PyUnicode_WRITE(kind, data, pos + t,
				621	locale->decimal_point[t]);
				622	pos += spec->n_decimal;
				623	d_pos += 1;
				624	}
				625
				626	if (spec->n_remainder) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	627	if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
				628	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	629	pos += spec->n_remainder;
				630	d_pos += spec->n_remainder;
				631	}
				632
				633	if (spec->n_rpadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	634	PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	635	pos += spec->n_rpadding;
				636	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	637	return 0;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	638	}
				639
				640	static char no_grouping[1] = {CHAR_MAX};
				641
				642	/* Find the decimal point character(s?), thousands_separator(s?), and
				643	grouping description, either for the current locale if type is
				644	LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
				645	none if LT_NO_LOCALE. */
				646	static void
				647	get_locale_info(int type, LocaleInfo *locale_info)
				648	{
				649	switch (type) {
				650	case LT_CURRENT_LOCALE: {
				651	struct lconv *locale_data = localeconv();
				652	locale_info->decimal_point = locale_data->decimal_point;
				653	locale_info->thousands_sep = locale_data->thousands_sep;
				654	locale_info->grouping = locale_data->grouping;
				655	break;
				656	}
				657	case LT_DEFAULT_LOCALE:
				658	locale_info->decimal_point = ".";
				659	locale_info->thousands_sep = ",";
				660	locale_info->grouping = "\3"; /* Group every 3 characters. The
				661	(implicit) trailing 0 means repeat
				662	infinitely. */
				663	break;
				664	case LT_NO_LOCALE:
				665	locale_info->decimal_point = ".";
				666	locale_info->thousands_sep = "";
				667	locale_info->grouping = no_grouping;
				668	break;
				669	default:
				670	assert(0);
				671	}
				672	}
				673
				674	/************************************************************************/
				675	/********* string formatting ****************************************/
				676	/************************************************************************/
				677
				678	static PyObject *
				679	format_string_internal(PyObject value, const InternalFormatSpec format)
				680	{
				681	Py_ssize_t lpad;
				682	Py_ssize_t rpad;
				683	Py_ssize_t total;
				684	Py_ssize_t pos;
Victor Stinner	c4f281e	2011-10-11 22:11:42 +0200	[diff] [blame]	685	Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	686	PyObject *result = NULL;
Amaury Forgeot d'Arc	cd27df3	2012-01-23 22:42:19 +0100	[diff] [blame]	687	Py_UCS4 maxchar = 127;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	688
				689	/* sign is not allowed on strings */
				690	if (format->sign != '\0') {
				691	PyErr_SetString(PyExc_ValueError,
				692	"Sign not allowed in string format specifier");
				693	goto done;
				694	}
				695
				696	/* alternate is not allowed on strings */
				697	if (format->alternate) {
				698	PyErr_SetString(PyExc_ValueError,
				699	"Alternate form (#) not allowed in string format "
				700	"specifier");
				701	goto done;
				702	}
				703
				704	/* '=' alignment not allowed on strings */
				705	if (format->align == '=') {
				706	PyErr_SetString(PyExc_ValueError,
				707	"'=' alignment not allowed "
				708	"in string format specifier");
				709	goto done;
				710	}
				711
				712	/* if precision is specified, output no more that format.precision
				713	characters */
				714	if (format->precision >= 0 && len >= format->precision) {
				715	len = format->precision;
				716	}
				717
				718	calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
				719
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	720	if (lpad != 0 \|\| rpad != 0)
				721	maxchar = Py_MAX(maxchar, format->fill_char);
				722
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	723	/* allocate the resulting string */
				724	result = PyUnicode_New(total, maxchar);
				725	if (result == NULL)
				726	goto done;
				727
				728	/* Write into that space. First the padding. */
				729	pos = fill_padding(result, 0, len,
				730	format->fill_char=='\0'?' ':format->fill_char,
				731	lpad, rpad);
				732
				733	/* Then the source string. */
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	734	if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
				735	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	736
				737	done:
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	738	assert(!result \|\| _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	739	return result;
				740	}
				741
				742
				743	/************************************************************************/
				744	/********* long formatting ******************************************/
				745	/************************************************************************/
				746
				747	typedef PyObject*
				748	(IntOrLongToString)(PyObject value, int base);
				749
				750	static PyObject *
				751	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				752	IntOrLongToString tostring)
				753	{
				754	PyObject *result = NULL;
Amaury Forgeot d'Arc	cd27df3	2012-01-23 22:42:19 +0100	[diff] [blame]	755	Py_UCS4 maxchar = 127;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	756	PyObject *tmp = NULL;
				757	Py_ssize_t inumeric_chars;
				758	Py_UCS4 sign_char = '\0';
				759	Py_ssize_t n_digits; /* count of digits need from the computed
				760	string */
				761	Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
				762	produces non-digits */
				763	Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
				764	Py_ssize_t n_total;
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	765	Py_ssize_t prefix = 0;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	766	NumberFieldWidths spec;
				767	long x;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	768	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	769
				770	/* Locale settings, either from the actual locale or
				771	from a hard-code pseudo-locale */
				772	LocaleInfo locale;
				773
				774	/* no precision allowed on integers */
				775	if (format->precision != -1) {
				776	PyErr_SetString(PyExc_ValueError,
				777	"Precision not allowed in integer format specifier");
				778	goto done;
				779	}
				780
				781	/* special case for character formatting */
				782	if (format->type == 'c') {
				783	/* error to specify a sign */
				784	if (format->sign != '\0') {
				785	PyErr_SetString(PyExc_ValueError,
				786	"Sign not allowed with integer"
				787	" format specifier 'c'");
				788	goto done;
				789	}
				790
				791	/* taken from unicodeobject.c formatchar() */
				792	/* Integer input truncated to a character */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	793	x = PyLong_AsLong(value);
				794	if (x == -1 && PyErr_Occurred())
				795	goto done;
				796	if (x < 0 \|\| x > 0x10ffff) {
				797	PyErr_SetString(PyExc_OverflowError,
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	798	"%c arg not in range(0x110000)");
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	799	goto done;
				800	}
				801	tmp = PyUnicode_FromOrdinal(x);
				802	inumeric_chars = 0;
				803	n_digits = 1;
Amaury Forgeot d'Arc	6d766fc	2012-01-23 23:20:43 +0100	[diff] [blame]	804	maxchar = Py_MAX(maxchar, (Py_UCS4)x);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	805
				806	/* As a sort-of hack, we tell calc_number_widths that we only
				807	have "remainder" characters. calc_number_widths thinks
				808	these are characters that don't get formatted, only copied
				809	into the output string. We do this for 'c' formatting,
				810	because the characters are likely to be non-digits. */
				811	n_remainder = 1;
				812	}
				813	else {
				814	int base;
				815	int leading_chars_to_skip = 0; /* Number of characters added by
				816	PyNumber_ToBase that we want to
				817	skip over. */
				818
				819	/* Compute the base and how many characters will be added by
				820	PyNumber_ToBase */
				821	switch (format->type) {
				822	case 'b':
				823	base = 2;
				824	leading_chars_to_skip = 2; /* 0b */
				825	break;
				826	case 'o':
				827	base = 8;
				828	leading_chars_to_skip = 2; /* 0o */
				829	break;
				830	case 'x':
				831	case 'X':
				832	base = 16;
				833	leading_chars_to_skip = 2; /* 0x */
				834	break;
				835	default: /* shouldn't be needed, but stops a compiler warning */
				836	case 'd':
				837	case 'n':
				838	base = 10;
				839	break;
				840	}
				841
				842	/* The number of prefix chars is the same as the leading
				843	chars to skip */
				844	if (format->alternate)
				845	n_prefix = leading_chars_to_skip;
				846
				847	/* Do the hard part, converting to a string in a given base */
				848	tmp = tostring(value, base);
				849	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -1)
				850	goto done;
				851
				852	inumeric_chars = 0;
				853	n_digits = PyUnicode_GET_LENGTH(tmp);
				854
				855	prefix = inumeric_chars;
				856
				857	/* Is a sign character present in the output? If so, remember it
				858	and skip it */
				859	if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
				860	sign_char = '-';
				861	++prefix;
				862	++leading_chars_to_skip;
				863	}
				864
				865	/* Skip over the leading chars (0x, 0b, etc.) */
				866	n_digits -= leading_chars_to_skip;
				867	inumeric_chars += leading_chars_to_skip;
				868	}
				869
				870	/* Determine the grouping, separator, and decimal point, if any. */
				871	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				872	(format->thousands_separators ?
				873	LT_DEFAULT_LOCALE :
				874	LT_NO_LOCALE),
				875	&locale);
				876
				877	/* Calculate how much memory we'll need. */
				878	n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
				879	inumeric_chars + n_digits, n_remainder, 0, &locale, format);
				880
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	881	if (spec.n_lpadding \|\| spec.n_spadding \|\| spec.n_rpadding)
				882	maxchar = Py_MAX(maxchar, format->fill_char);
				883
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	884	/* Allocate the memory. */
				885	result = PyUnicode_New(n_total, maxchar);
				886	if (!result)
				887	goto done;
				888
				889	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	890	err = fill_number(result, 0, &spec,
				891	tmp, inumeric_chars, inumeric_chars + n_digits,
				892	tmp, prefix,
				893	format->fill_char == '\0' ? ' ' : format->fill_char,
				894	&locale, format->type == 'X');
				895	if (err)
				896	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	897
				898	done:
				899	Py_XDECREF(tmp);
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	900	assert(!result \|\| _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	901	return result;
				902	}
				903
				904	/************************************************************************/
				905	/********* float formatting *****************************************/
				906	/************************************************************************/
				907
				908	static PyObject*
				909	strtounicode(char *charbuffer, Py_ssize_t len)
				910	{
				911	return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
				912	}
				913
				914	/* much of this is taken from unicodeobject.c */
				915	static PyObject *
				916	format_float_internal(PyObject *value,
				917	const InternalFormatSpec *format)
				918	{
				919	char buf = NULL; / buffer returned from PyOS_double_to_string */
				920	Py_ssize_t n_digits;
				921	Py_ssize_t n_remainder;
				922	Py_ssize_t n_total;
				923	int has_decimal;
				924	double val;
				925	Py_ssize_t precision = format->precision;
				926	Py_ssize_t default_precision = 6;
				927	Py_UCS4 type = format->type;
				928	int add_pct = 0;
				929	Py_ssize_t index;
				930	NumberFieldWidths spec;
				931	int flags = 0;
				932	PyObject *result = NULL;
Amaury Forgeot d'Arc	cd27df3	2012-01-23 22:42:19 +0100	[diff] [blame]	933	Py_UCS4 maxchar = 127;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	934	Py_UCS4 sign_char = '\0';
				935	int float_type; /* Used to see if we have a nan, inf, or regular float. */
				936	PyObject *unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	937	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	938
				939	/* Locale settings, either from the actual locale or
				940	from a hard-code pseudo-locale */
				941	LocaleInfo locale;
				942
				943	if (format->alternate)
				944	flags \|= Py_DTSF_ALT;
				945
				946	if (type == '\0') {
				947	/* Omitted type specifier. Behaves in the same way as repr(x)
				948	and str(x) if no precision is given, else like 'g', but with
				949	at least one digit after the decimal point. */
				950	flags \|= Py_DTSF_ADD_DOT_0;
				951	type = 'r';
				952	default_precision = 0;
				953	}
				954
				955	if (type == 'n')
				956	/* 'n' is the same as 'g', except for the locale used to
				957	format the result. We take care of that later. */
				958	type = 'g';
				959
				960	val = PyFloat_AsDouble(value);
				961	if (val == -1.0 && PyErr_Occurred())
				962	goto done;
				963
				964	if (type == '%') {
				965	type = 'f';
				966	val *= 100;
				967	add_pct = 1;
				968	}
				969
				970	if (precision < 0)
				971	precision = default_precision;
				972	else if (type == 'r')
				973	type = 'g';
				974
				975	/* Cast "type", because if we're in unicode we need to pass a
				976	8-bit char. This is safe, because we've restricted what "type"
				977	can be. */
				978	buf = PyOS_double_to_string(val, (char)type, precision, flags,
				979	&float_type);
				980	if (buf == NULL)
				981	goto done;
				982	n_digits = strlen(buf);
				983
				984	if (add_pct) {
				985	/* We know that buf has a trailing zero (since we just called
				986	strlen() on it), and we don't use that fact any more. So we
				987	can just write over the trailing zero. */
				988	buf[n_digits] = '%';
				989	n_digits += 1;
				990	}
				991
				992	/* Since there is no unicode version of PyOS_double_to_string,
				993	just use the 8 bit version and then convert to unicode. */
				994	unicode_tmp = strtounicode(buf, n_digits);
				995	if (unicode_tmp == NULL)
				996	goto done;
				997	index = 0;
				998
				999	/* Is a sign character present in the output? If so, remember it
				1000	and skip it */
				1001	if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
				1002	sign_char = '-';
				1003	++index;
				1004	--n_digits;
				1005	}
				1006
				1007	/* Determine if we have any "remainder" (after the digits, might include
				1008	decimal or exponent or both (or neither)) */
				1009	parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
				1010
				1011	/* Determine the grouping, separator, and decimal point, if any. */
				1012	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1013	(format->thousands_separators ?
				1014	LT_DEFAULT_LOCALE :
				1015	LT_NO_LOCALE),
				1016	&locale);
				1017
				1018	/* Calculate how much memory we'll need. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1019	n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1020	index + n_digits, n_remainder, has_decimal,
				1021	&locale, format);
				1022
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	1023	if (spec.n_lpadding \|\| spec.n_spadding \|\| spec.n_rpadding)
				1024	maxchar = Py_MAX(maxchar, format->fill_char);
				1025
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1026	/* Allocate the memory. */
				1027	result = PyUnicode_New(n_total, maxchar);
				1028	if (result == NULL)
				1029	goto done;
				1030
				1031	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1032	err = fill_number(result, 0, &spec,
				1033	unicode_tmp, index, index + n_digits,
				1034	NULL, 0,
				1035	format->fill_char == '\0' ? ' ' : format->fill_char,
				1036	&locale, 0);
				1037	if (err)
				1038	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1039
				1040	done:
				1041	PyMem_Free(buf);
				1042	Py_DECREF(unicode_tmp);
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	1043	assert(!result \|\| _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1044	return result;
				1045	}
				1046
				1047	/************************************************************************/
				1048	/********* complex formatting ***************************************/
				1049	/************************************************************************/
				1050
				1051	static PyObject *
				1052	format_complex_internal(PyObject *value,
				1053	const InternalFormatSpec *format)
				1054	{
				1055	double re;
				1056	double im;
				1057	char re_buf = NULL; / buffer returned from PyOS_double_to_string */
				1058	char im_buf = NULL; / buffer returned from PyOS_double_to_string */
				1059
				1060	InternalFormatSpec tmp_format = *format;
				1061	Py_ssize_t n_re_digits;
				1062	Py_ssize_t n_im_digits;
				1063	Py_ssize_t n_re_remainder;
				1064	Py_ssize_t n_im_remainder;
				1065	Py_ssize_t n_re_total;
				1066	Py_ssize_t n_im_total;
				1067	int re_has_decimal;
				1068	int im_has_decimal;
				1069	Py_ssize_t precision = format->precision;
				1070	Py_ssize_t default_precision = 6;
				1071	Py_UCS4 type = format->type;
				1072	Py_ssize_t i_re;
				1073	Py_ssize_t i_im;
				1074	NumberFieldWidths re_spec;
				1075	NumberFieldWidths im_spec;
				1076	int flags = 0;
				1077	PyObject *result = NULL;
Amaury Forgeot d'Arc	cd27df3	2012-01-23 22:42:19 +0100	[diff] [blame]	1078	Py_UCS4 maxchar = 127;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1079	int rkind;
				1080	void *rdata;
				1081	Py_ssize_t index;
				1082	Py_UCS4 re_sign_char = '\0';
				1083	Py_UCS4 im_sign_char = '\0';
				1084	int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
				1085	int im_float_type;
				1086	int add_parens = 0;
				1087	int skip_re = 0;
				1088	Py_ssize_t lpad;
				1089	Py_ssize_t rpad;
				1090	Py_ssize_t total;
				1091	PyObject *re_unicode_tmp = NULL;
				1092	PyObject *im_unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1093	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1094
				1095	/* Locale settings, either from the actual locale or
				1096	from a hard-code pseudo-locale */
				1097	LocaleInfo locale;
				1098
				1099	/* Zero padding is not allowed. */
				1100	if (format->fill_char == '0') {
				1101	PyErr_SetString(PyExc_ValueError,
				1102	"Zero padding is not allowed in complex format "
				1103	"specifier");
				1104	goto done;
				1105	}
				1106
				1107	/* Neither is '=' alignment . */
				1108	if (format->align == '=') {
				1109	PyErr_SetString(PyExc_ValueError,
				1110	"'=' alignment flag is not allowed in complex format "
				1111	"specifier");
				1112	goto done;
				1113	}
				1114
				1115	re = PyComplex_RealAsDouble(value);
				1116	if (re == -1.0 && PyErr_Occurred())
				1117	goto done;
				1118	im = PyComplex_ImagAsDouble(value);
				1119	if (im == -1.0 && PyErr_Occurred())
				1120	goto done;
				1121
				1122	if (format->alternate)
				1123	flags \|= Py_DTSF_ALT;
				1124
				1125	if (type == '\0') {
				1126	/* Omitted type specifier. Should be like str(self). */
				1127	type = 'r';
				1128	default_precision = 0;
				1129	if (re == 0.0 && copysign(1.0, re) == 1.0)
				1130	skip_re = 1;
				1131	else
				1132	add_parens = 1;
				1133	}
				1134
				1135	if (type == 'n')
				1136	/* 'n' is the same as 'g', except for the locale used to
				1137	format the result. We take care of that later. */
				1138	type = 'g';
				1139
				1140	if (precision < 0)
				1141	precision = default_precision;
				1142	else if (type == 'r')
				1143	type = 'g';
				1144
				1145	/* Cast "type", because if we're in unicode we need to pass a
				1146	8-bit char. This is safe, because we've restricted what "type"
				1147	can be. */
				1148	re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
				1149	&re_float_type);
				1150	if (re_buf == NULL)
				1151	goto done;
				1152	im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
				1153	&im_float_type);
				1154	if (im_buf == NULL)
				1155	goto done;
				1156
				1157	n_re_digits = strlen(re_buf);
				1158	n_im_digits = strlen(im_buf);
				1159
				1160	/* Since there is no unicode version of PyOS_double_to_string,
				1161	just use the 8 bit version and then convert to unicode. */
				1162	re_unicode_tmp = strtounicode(re_buf, n_re_digits);
				1163	if (re_unicode_tmp == NULL)
				1164	goto done;
				1165	i_re = 0;
				1166
				1167	im_unicode_tmp = strtounicode(im_buf, n_im_digits);
				1168	if (im_unicode_tmp == NULL)
				1169	goto done;
				1170	i_im = 0;
				1171
				1172	/* Is a sign character present in the output? If so, remember it
				1173	and skip it */
				1174	if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
				1175	re_sign_char = '-';
				1176	++i_re;
				1177	--n_re_digits;
				1178	}
				1179	if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
				1180	im_sign_char = '-';
				1181	++i_im;
				1182	--n_im_digits;
				1183	}
				1184
				1185	/* Determine if we have any "remainder" (after the digits, might include
				1186	decimal or exponent or both (or neither)) */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1187	parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1188	&n_re_remainder, &re_has_decimal);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1189	parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1190	&n_im_remainder, &im_has_decimal);
				1191
				1192	/* Determine the grouping, separator, and decimal point, if any. */
				1193	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1194	(format->thousands_separators ?
				1195	LT_DEFAULT_LOCALE :
				1196	LT_NO_LOCALE),
				1197	&locale);
				1198
				1199	/* Turn off any padding. We'll do it later after we've composed
				1200	the numbers without padding. */
				1201	tmp_format.fill_char = '\0';
				1202	tmp_format.align = '<';
				1203	tmp_format.width = -1;
				1204
				1205	/* Calculate how much memory we'll need. */
				1206	n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
				1207	i_re, i_re + n_re_digits, n_re_remainder,
				1208	re_has_decimal, &locale, &tmp_format);
				1209
				1210	/* Same formatting, but always include a sign, unless the real part is
				1211	* going to be omitted, in which case we use whatever sign convention was
				1212	* requested by the original format. */
				1213	if (!skip_re)
				1214	tmp_format.sign = '+';
				1215	n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
				1216	i_im, i_im + n_im_digits, n_im_remainder,
				1217	im_has_decimal, &locale, &tmp_format);
				1218
				1219	if (skip_re)
				1220	n_re_total = 0;
				1221
				1222	/* Add 1 for the 'j', and optionally 2 for parens. */
				1223	calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
				1224	format->width, format->align, &lpad, &rpad, &total);
				1225
Victor Stinner	a4ac600	2012-01-21 15:50:49 +0100	[diff] [blame]	1226	if (re_spec.n_lpadding \|\| re_spec.n_spadding \|\| re_spec.n_rpadding
				1227	\|\| im_spec.n_lpadding \|\| im_spec.n_spadding \|\| im_spec.n_rpadding
				1228	\|\| lpad \|\| rpad)
				1229	maxchar = Py_MAX(maxchar, format->fill_char);
				1230
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1231	result = PyUnicode_New(total, maxchar);
				1232	if (result == NULL)
				1233	goto done;
				1234	rkind = PyUnicode_KIND(result);
				1235	rdata = PyUnicode_DATA(result);
				1236
				1237	/* Populate the memory. First, the padding. */
				1238	index = fill_padding(result, 0,
				1239	n_re_total + n_im_total + 1 + add_parens * 2,
				1240	format->fill_char=='\0' ? ' ' : format->fill_char,
				1241	lpad, rpad);
				1242
				1243	if (add_parens)
				1244	PyUnicode_WRITE(rkind, rdata, index++, '(');
				1245
				1246	if (!skip_re) {
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1247	err = fill_number(result, index, &re_spec,
				1248	re_unicode_tmp, i_re, i_re + n_re_digits,
				1249	NULL, 0,
				1250	0,
				1251	&locale, 0);
				1252	if (err) {
				1253	Py_CLEAR(result);
				1254	goto done;
				1255	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1256	index += n_re_total;
				1257	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1258	err = fill_number(result, index, &im_spec,
				1259	im_unicode_tmp, i_im, i_im + n_im_digits,
				1260	NULL, 0,
				1261	0,
				1262	&locale, 0);
				1263	if (err) {
				1264	Py_CLEAR(result);
				1265	goto done;
				1266	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1267	index += n_im_total;
				1268	PyUnicode_WRITE(rkind, rdata, index++, 'j');
				1269
				1270	if (add_parens)
				1271	PyUnicode_WRITE(rkind, rdata, index++, ')');
				1272
				1273	done:
				1274	PyMem_Free(re_buf);
				1275	PyMem_Free(im_buf);
				1276	Py_XDECREF(re_unicode_tmp);
				1277	Py_XDECREF(im_unicode_tmp);
Victor Stinner	ed27785	2012-02-01 00:22:23 +0100	[diff] [blame^]	1278	assert(!result \|\| _PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1279	return result;
				1280	}
				1281
				1282	/************************************************************************/
				1283	/********* built in formatters **************************************/
				1284	/************************************************************************/
				1285	PyObject *
				1286	_PyUnicode_FormatAdvanced(PyObject *obj,
				1287	PyObject *format_spec,
				1288	Py_ssize_t start, Py_ssize_t end)
				1289	{
				1290	InternalFormatSpec format;
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1291	PyObject *result;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1292
				1293	/* check for the special case of zero length format spec, make
				1294	it equivalent to str(obj) */
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1295	if (start == end)
				1296	return PyObject_Str(obj);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1297
				1298	/* parse the format_spec */
				1299	if (!parse_internal_render_format_spec(format_spec, start, end,
				1300	&format, 's', '<'))
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1301	return NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1302
				1303	/* type conversion? */
				1304	switch (format.type) {
				1305	case 's':
				1306	/* no type conversion needed, already a string. do the formatting */
				1307	result = format_string_internal(obj, &format);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1308	if (result != NULL)
				1309	assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1310	break;
				1311	default:
				1312	/* unknown */
				1313	unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1314	result = NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1315	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1316	return result;
				1317	}
				1318
				1319	static PyObject*
				1320	format_int_or_long(PyObject* obj, PyObject* format_spec,
				1321	Py_ssize_t start, Py_ssize_t end,
				1322	IntOrLongToString tostring)
				1323	{
				1324	PyObject *result = NULL;
				1325	PyObject *tmp = NULL;
				1326	InternalFormatSpec format;
				1327
				1328	/* check for the special case of zero length format spec, make
				1329	it equivalent to str(obj) */
				1330	if (start == end) {
				1331	result = PyObject_Str(obj);
				1332	goto done;
				1333	}
				1334
				1335	/* parse the format_spec */
				1336	if (!parse_internal_render_format_spec(format_spec, start, end,
				1337	&format, 'd', '>'))
				1338	goto done;
				1339
				1340	/* type conversion? */
				1341	switch (format.type) {
				1342	case 'b':
				1343	case 'c':
				1344	case 'd':
				1345	case 'o':
				1346	case 'x':
				1347	case 'X':
				1348	case 'n':
				1349	/* no type conversion needed, already an int (or long). do
				1350	the formatting */
				1351	result = format_int_or_long_internal(obj, &format, tostring);
				1352	break;
				1353
				1354	case 'e':
				1355	case 'E':
				1356	case 'f':
				1357	case 'F':
				1358	case 'g':
				1359	case 'G':
				1360	case '%':
				1361	/* convert to float */
				1362	tmp = PyNumber_Float(obj);
				1363	if (tmp == NULL)
				1364	goto done;
				1365	result = format_float_internal(tmp, &format);
				1366	break;
				1367
				1368	default:
				1369	/* unknown */
				1370	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1371	goto done;
				1372	}
				1373
				1374	done:
				1375	Py_XDECREF(tmp);
				1376	return result;
				1377	}
				1378
				1379	/* Need to define long_format as a function that will convert a long
				1380	to a string. In 3.0, _PyLong_Format has the correct signature. */
				1381	#define long_format _PyLong_Format
				1382
				1383	PyObject *
				1384	_PyLong_FormatAdvanced(PyObject *obj,
				1385	PyObject *format_spec,
				1386	Py_ssize_t start, Py_ssize_t end)
				1387	{
				1388	return format_int_or_long(obj, format_spec, start, end,
				1389	long_format);
				1390	}
				1391
				1392	PyObject *
				1393	_PyFloat_FormatAdvanced(PyObject *obj,
				1394	PyObject *format_spec,
				1395	Py_ssize_t start, Py_ssize_t end)
				1396	{
				1397	PyObject *result = NULL;
				1398	InternalFormatSpec format;
				1399
				1400	/* check for the special case of zero length format spec, make
				1401	it equivalent to str(obj) */
				1402	if (start == end) {
				1403	result = PyObject_Str(obj);
				1404	goto done;
				1405	}
				1406
				1407	/* parse the format_spec */
				1408	if (!parse_internal_render_format_spec(format_spec, start, end,
				1409	&format, '\0', '>'))
				1410	goto done;
				1411
				1412	/* type conversion? */
				1413	switch (format.type) {
				1414	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1415	case 'e':
				1416	case 'E':
				1417	case 'f':
				1418	case 'F':
				1419	case 'g':
				1420	case 'G':
				1421	case 'n':
				1422	case '%':
				1423	/* no conversion, already a float. do the formatting */
				1424	result = format_float_internal(obj, &format);
				1425	break;
				1426
				1427	default:
				1428	/* unknown */
				1429	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1430	goto done;
				1431	}
				1432
				1433	done:
				1434	return result;
				1435	}
				1436
				1437	PyObject *
				1438	_PyComplex_FormatAdvanced(PyObject *obj,
				1439	PyObject *format_spec,
				1440	Py_ssize_t start, Py_ssize_t end)
				1441	{
				1442	PyObject *result = NULL;
				1443	InternalFormatSpec format;
				1444
				1445	/* check for the special case of zero length format spec, make
				1446	it equivalent to str(obj) */
				1447	if (start == end) {
				1448	result = PyObject_Str(obj);
				1449	goto done;
				1450	}
				1451
				1452	/* parse the format_spec */
				1453	if (!parse_internal_render_format_spec(format_spec, start, end,
				1454	&format, '\0', '>'))
				1455	goto done;
				1456
				1457	/* type conversion? */
				1458	switch (format.type) {
				1459	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1460	case 'e':
				1461	case 'E':
				1462	case 'f':
				1463	case 'F':
				1464	case 'g':
				1465	case 'G':
				1466	case 'n':
				1467	/* no conversion, already a complex. do the formatting */
				1468	result = format_complex_internal(obj, &format);
				1469	break;
				1470
				1471	default:
				1472	/* unknown */
				1473	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1474	goto done;
				1475	}
				1476
				1477	done:
				1478	return result;
				1479	}