Blame - useful/fp_80_64.c - platform/external/valgrind

blob: 954a9c9e3cc1c074f76a2d9713bca47de64ef8fc [file] [log] [blame]

sewardj	cbe8efa	2004-09-06 14:57:52 +0000	[diff] [blame]	1
sewardj	07e0962	2004-09-06 20:39:20 +0000	[diff] [blame^]	2	#ifndef USED_AS_INCLUDE
				3
sewardj	cbe8efa	2004-09-06 14:57:52 +0000	[diff] [blame]	4	#include "../pub/libvex_basictypes.h"
				5	#include <stdio.h>
				6	#include <malloc.h>
				7	#include <stdlib.h>
				8	#include <string.h>
				9
				10
				11	/* Test program for developing code for conversions between
				12	x87 64-bit and 80-bit floats.
				13
				14	80-bit format exists only for x86/x86-64, and so the routines
				15	hardwire it as little-endian. The 64-bit format (IEEE double)
				16	could exist on any platform, little or big-endian and so we
				17	have to take that into account. IOW, these routines have to
				18	work correctly when compiled on both big- and little-endian
				19	targets, but the 80-bit images only ever have to exist in
				20	little-endian format.
				21	*/
				22
				23	static
				24	UInt read_bit_array ( UChar* arr, UInt n )
				25	{
				26	UChar c = arr[n >> 3];
				27	c >>= (n&7);
				28	return c & 1;
				29	}
				30
				31
				32	static void convert_f80le_to_f64le_HW ( /IN/UChar* f80, /OUT/UChar* f64 )
				33	{
				34	asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
				35	:
				36	: "r" (&f80[0]), "r" (&f64[0])
				37	: "memory" );
				38	}
				39
				40	static void convert_f64le_to_f80le_HW ( /IN/UChar* f64, /OUT/UChar* f80 )
				41	{
				42	asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
				43	:
				44	: "r" (&f64[0]), "r" (&f80[0])
				45	: "memory" );
				46	}
				47
sewardj	07e0962	2004-09-06 20:39:20 +0000	[diff] [blame^]	48	#endif /* ndef USED_AS_INCLUDE */
				49
sewardj	cbe8efa	2004-09-06 14:57:52 +0000	[diff] [blame]	50	/* 80 and 64-bit floating point formats:
				51
				52	80-bit:
				53
				54	S 0 0-------0 zero
				55	S 0 0X------X denormals
				56	S 1-7FFE 1X------X normals (all normals have leading 1)
				57	S 7FFF 10------0 infinity
				58	S 7FFF 10X-----X snan
				59	S 7FFF 11X-----X qnan
				60
				61	S is the sign bit. For runs X----X, at least one of the Xs must be
				62	nonzero. Exponent is 15 bits, fractional part is 63 bits, and
				63	there is an explicitly represented leading 1, and a sign bit,
				64	giving 80 in total.
				65
				66	64-bit avoids the confusion of an explicitly represented leading 1
				67	and so is simpler:
				68
				69	S 0 0------0 zero
				70	S 0 X------X denormals
				71	S 1-7FE any normals
				72	S 7FF 0------0 infinity
				73	S 7FF 0X-----X snan
				74	S 7FF 1X-----X qnan
				75
				76	Exponent is 11 bits, fractional part is 52 bits, and there is a
				77	sign bit, giving 64 in total.
				78	*/
				79
				80
				81	/* Convert a IEEE754 double (64-bit) into an x87 extended double
				82	(80-bit), mimicing the hardware fairly closely. Both numbers are
				83	stored little-endian. Limitations, all of which could be fixed,
				84	given some level of hassle:
				85
				86	* Does not handle double precision denormals. As a result, values
				87	with magnitudes less than 1e-308 are flushed to zero when they
				88	need not be.
				89
				90	* Identity of NaNs is not preserved.
				91
				92	See comments in the code for more details.
				93	*/
				94	static void convert_f64le_to_f80le ( /IN/UChar* f64, /OUT/UChar* f80 )
				95	{
				96	Bool isInf;
				97	Int bexp;
				98	UChar sign;
				99
				100	sign = (f64[7] >> 7) & 1;
				101	bexp = (f64[7] << 4) \| ((f64[6] >> 4) & 0x0F);
				102	bexp &= 0x7FF;
				103
				104	/* If the exponent is zero, either we have a zero or a denormal.
				105	Produce a zero. This is a hack in that it forces denormals to
				106	zero. Could do better. */
				107	if (bexp == 0) {
				108	f80[9] = sign << 7;
				109	f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
				110	= f80[3] = f80[2] = f80[1] = f80[0] = 0;
				111	return;
				112	}
				113
				114	/* If the exponent is 7FF, this is either an Infinity, a SNaN or
				115	QNaN, as determined by examining bits 51:0, thus:
				116	0 ... 0 Inf
				117	0X ... X SNaN
				118	1X ... X QNaN
				119	where at least one of the Xs is not zero.
				120	*/
				121	if (bexp == 0x7FF) {
				122	isInf = (f64[6] & 0x0F) == 0
				123	&& f64[5] == 0 && f64[4] == 0 && f64[3] == 0
				124	&& f64[2] == 0 && f64[1] == 0 && f64[0] == 0;
				125	if (isInf) {
				126	/* Produce an appropriately signed infinity:
				127	S 1--1 (15) 1 0--0 (63)
				128	*/
				129	f80[9] = (sign << 7) \| 0x7F;
				130	f80[8] = 0xFF;
				131	f80[7] = 0x80;
				132	f80[6] = f80[5] = f80[4] = f80[3]
				133	= f80[2] = f80[1] = f80[0] = 0;
				134	return;
				135	}
				136	/* So it's either a QNaN or SNaN. Distinguish by considering
				137	bit 51. Note, this destroys all the trailing bits
				138	(identity?) of the NaN. IEEE754 doesn't require preserving
				139	these (it only requires that there be one QNaN value and one
				140	SNaN value), but x87 does seem to have some ability to
				141	preserve them. Anyway, here, the NaN's identity is
				142	destroyed. Could be improved. */
				143	if (f64[6] & 8) {
				144	/* QNaN. Make a QNaN:
				145	S 1--1 (15) 1 1--1 (63)
				146	*/
				147	f80[9] = (sign << 7) \| 0x7F;
				148	f80[8] = 0xFF;
				149	f80[7] = 0xFF;
				150	f80[6] = f80[5] = f80[4] = f80[3]
				151	= f80[2] = f80[1] = f80[0] = 0xFF;
				152	} else {
				153	/* SNaN. Make a SNaN:
				154	S 1--1 (15) 0 1--1 (63)
				155	*/
				156	f80[9] = (sign << 7) \| 0x7F;
				157	f80[8] = 0xFF;
				158	f80[7] = 0x7F;
				159	f80[6] = f80[5] = f80[4] = f80[3]
				160	= f80[2] = f80[1] = f80[0] = 0xFF;
				161	}
				162	return;
				163	}
				164
				165	/* It's not a zero, denormal, infinity or nan. So it must be a
				166	normalised number. Rebias the exponent and build the new
				167	number. */
				168	bexp += (16383 - 1023);
				169
				170	f80[9] = (sign << 7) \| ((bexp >> 8) & 0xFF);
				171	f80[8] = bexp & 0xFF;
				172	f80[7] = (1 << 7) \| ((f64[6] << 3) & 0x78) \| ((f64[5] >> 5) & 7);
				173	f80[6] = ((f64[5] << 3) & 0xF8) \| ((f64[4] >> 5) & 7);
				174	f80[5] = ((f64[4] << 3) & 0xF8) \| ((f64[3] >> 5) & 7);
				175	f80[4] = ((f64[3] << 3) & 0xF8) \| ((f64[2] >> 5) & 7);
				176	f80[3] = ((f64[2] << 3) & 0xF8) \| ((f64[1] >> 5) & 7);
				177	f80[2] = ((f64[1] << 3) & 0xF8) \| ((f64[0] >> 5) & 7);
				178	f80[1] = ((f64[0] << 3) & 0xF8);
				179	f80[0] = 0;
				180	}
				181
				182
				183	/////////////////////////////////////////////////////////////////
				184
				185	/* Convert a x87 extended double (80-bit) into an IEEE 754 double
				186	(64-bit), mimicing the hardware fairly closely. Both numbers are
				187	stored little-endian. Limitations, all of which could be fixed,
				188	given some level of hassle:
				189
				190	* Does not create double precision denormals. As a result, values
				191	with magnitudes less than 1e-308 are flushed to zero when they
				192	need not be.
				193
				194	* Rounding following truncation could be a bit better.
				195
				196	* Identity of NaNs is not preserved.
				197
				198	See comments in the code for more details.
				199	*/
				200	static void convert_f80le_to_f64le ( /IN/UChar* f80, /OUT/UChar* f64 )
				201	{
				202	Bool isInf;
				203	Int bexp;
				204	UChar sign;
				205
				206	sign = (f80[9] >> 7) & 1;
				207	bexp = (((UInt)f80[9]) << 8) \| (UInt)f80[8];
				208	bexp &= 0x7FFF;
				209
				210	/* If the exponent is zero, either we have a zero or a denormal.
				211	But an extended precision denormal becomes a double precision
				212	zero, so in either case, just produce the appropriately signed
				213	zero. */
				214	if (bexp == 0) {
				215	f64[7] = sign << 7;
				216	f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
				217	return;
				218	}
				219
				220	/* If the exponent is 7FFF, this is either an Infinity, a SNaN or
				221	QNaN, as determined by examining bits 62:0, thus:
				222	0 ... 0 Inf
				223	0X ... X SNaN
				224	1X ... X QNaN
				225	where at least one of the Xs is not zero.
				226	*/
				227	if (bexp == 0x7FFF) {
				228	isInf = (f80[7] & 0x7F) == 0
				229	&& f80[6] == 0 && f80[5] == 0 && f80[4] == 0
				230	&& f80[3] == 0 && f80[2] == 0 && f80[1] == 0 && f80[0] == 0;
				231	if (isInf) {
				232	if (0 == (f80[7] & 0x80))
				233	goto wierd_NaN;
				234	/* Produce an appropriately signed infinity:
				235	S 1--1 (11) 0--0 (52)
				236	*/
				237	f64[7] = (sign << 7) \| 0x7F;
				238	f64[6] = 0xF0;
				239	f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
				240	return;
				241	}
				242	/* So it's either a QNaN or SNaN. Distinguish by considering
				243	bit 62. Note, this destroys all the trailing bits
				244	(identity?) of the NaN. IEEE754 doesn't require preserving
				245	these (it only requires that there be one QNaN value and one
				246	SNaN value), but x87 does seem to have some ability to
				247	preserve them. Anyway, here, the NaN's identity is
				248	destroyed. Could be improved. */
				249	if (f80[8] & 0x40) {
				250	/* QNaN. Make a QNaN:
				251	S 1--1 (11) 1 1--1 (51)
				252	*/
				253	f64[7] = (sign << 7) \| 0x7F;
				254	f64[6] = 0xFF;
				255	f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
				256	} else {
				257	/* SNaN. Make a SNaN:
				258	S 1--1 (11) 0 1--1 (51)
				259	*/
				260	f64[7] = (sign << 7) \| 0x7F;
				261	f64[6] = 0xF7;
				262	f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
				263	}
				264	return;
				265	}
				266
				267	/* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
				268	zero, the x87 FPU appears to consider the number denormalised
				269	and converts it to a QNaN. */
				270	if (0 == (f80[7] & 0x80)) {
				271	wierd_NaN:
				272	/* Strange hardware QNaN:
				273	S 1--1 (11) 1 0--0 (51)
				274	*/
				275	/* On a PIII, these QNaNs always appear with sign==1. I have
				276	no idea why. */
				277	f64[7] = (1 /sign/ << 7) \| 0x7F;
				278	f64[6] = 0xF8;
				279	f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
				280	return;
				281	}
				282
				283	/* It's not a zero, denormal, infinity or nan. So it must be a
				284	normalised number. Rebias the exponent and consider. */
				285	bexp -= (16383 - 1023);
				286	if (bexp >= 0x7FF) {
				287	/* It's too big for a double. Construct an infinity. */
				288	f64[7] = (sign << 7) \| 0x7F;
				289	f64[6] = 0xF0;
				290	f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
				291	return;
				292	}
				293
				294	if (bexp < 0) {
				295	/* It's too small for a double. Construct a zero. Note, this
				296	is a kludge since we could conceivably create a
				297	denormalised number for bexp in -1 to -51, but we don't
				298	bother. This means the conversion flushes values
				299	approximately in the range 1e-309 to 1e-324 ish to zero
				300	when it doesn't actually need to. This could be
				301	improved. */
				302	f64[7] = sign << 7;
				303	f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
				304	return;
				305	}
				306
				307	/* Ok, it's a normalised number which is representable as a double.
				308	Copy the exponent and mantissa into place. */
				309	/*
				310	for (i = 0; i < 52; i++)
				311	write_bit_array ( f64,
				312	i,
				313	read_bit_array ( f80, i+11 ) );
				314	*/
				315	f64[0] = (f80[1] >> 3) \| (f80[2] << 5);
				316	f64[1] = (f80[2] >> 3) \| (f80[3] << 5);
				317	f64[2] = (f80[3] >> 3) \| (f80[4] << 5);
				318	f64[3] = (f80[4] >> 3) \| (f80[5] << 5);
				319	f64[4] = (f80[5] >> 3) \| (f80[6] << 5);
				320	f64[5] = (f80[6] >> 3) \| (f80[7] << 5);
				321
				322	f64[6] = ((bexp << 4) & 0xF0) \| ((f80[7] >> 3) & 0x0F);
				323
				324	f64[7] = (sign << 7) \| ((bexp >> 4) & 0x7F);
				325
				326	/* Now consider any rounding that needs to happen as a result of
				327	truncating the mantissa. */
				328	if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
				329	/* Round upwards. This is a kludge. Once in every 64k
				330	roundings (statistically) the bottom two bytes are both 0xFF
				331	and so we don't round at all. Could be improved. */
				332	if (f64[0] != 0xFF) {
				333	f64[0]++;
				334	}
				335	else
				336	if (f64[0] == 0xFF && f64[1] != 0xFF) {
				337	f64[0] = 0;
				338	f64[1]++;
				339	}
				340	/* else we don't round, but we should. */
				341	}
				342	}
				343
sewardj	07e0962	2004-09-06 20:39:20 +0000	[diff] [blame^]	344	#ifndef USED_AS_INCLUDE
sewardj	cbe8efa	2004-09-06 14:57:52 +0000	[diff] [blame]	345
				346	//////////////
				347
				348	static void show_f80 ( UChar* f80 )
				349	{
				350	Int i;
				351	printf("%d ", read_bit_array(f80, 79));
				352
				353	for (i = 78; i >= 64; i--)
				354	printf("%d", read_bit_array(f80, i));
				355
				356	printf(" %d ", read_bit_array(f80, 63));
				357
				358	for (i = 62; i >= 0; i--)
				359	printf("%d", read_bit_array(f80, i));
				360	}
				361
				362	static void show_f64le ( UChar* f64 )
				363	{
				364	Int i;
				365	printf("%d ", read_bit_array(f64, 63));
				366
				367	for (i = 62; i >= 52; i--)
				368	printf("%d", read_bit_array(f64, i));
				369
				370	printf(" ");
				371	for (i = 51; i >= 0; i--)
				372	printf("%d", read_bit_array(f64, i));
				373	}
				374
				375	//////////////
				376
				377
				378	/* Convert f80 to a 64-bit IEEE double using both the hardware and the
				379	soft version, and compare the results. If they differ, print
				380	details and return 1. If they are identical, return 0.
				381	*/
				382	int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
				383	{
				384	Char buf64s[100], buf64h[100];
				385	Bool same;
				386	Int k;
				387	convert_f80le_to_f64le_HW(f80, f64h);
				388	convert_f80le_to_f64le(f80, f64s);
				389	same = True;
				390	for (k = 0; k < 8; k++) {
				391	if (f64s[k] != f64h[k]) {
				392	same = False; break;
				393	}
				394	}
				395	/* bitwise identical */
				396	if (same)
				397	return 0;
				398
				399	sprintf(buf64s, "%.16e", (double)f64s);
				400	sprintf(buf64h, "%.16e", (double)f64h);
				401
				402	/* Not bitwise identical, but pretty darn close */
				403	if (0 == strcmp(buf64s, buf64h))
				404	return 0;
				405
				406	printf("\n");
				407	printf("f80: "); show_f80(f80); printf("\n");
				408	printf("f64h: "); show_f64le(f64h); printf("\n");
				409	printf("f64s: "); show_f64le(f64s); printf("\n");
				410
				411	printf("[test %d] %.16Le -> (hw %s, sw %s)\n",
				412	test_no, (long double)f80,
				413	buf64h, buf64s );
				414
				415	return 1;
				416	}
				417
				418
				419	/* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
				420	using both the hardware and the soft version, and compare the
				421	results. If they differ, print details and return 1. If they are
				422	identical, return 0.
				423	*/
				424	int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
				425	{
				426	Char buf80s[100], buf80h[100];
				427	Bool same;
				428	Int k;
				429	convert_f64le_to_f80le_HW(f64, f80h);
				430	convert_f64le_to_f80le(f64, f80s);
				431	same = True;
				432	for (k = 0; k < 10; k++) {
				433	if (f80s[k] != f80h[k]) {
				434	same = False; break;
				435	}
				436	}
				437	/* bitwise identical */
				438	if (same)
				439	return 0;
				440
				441	sprintf(buf80s, "%.20Le", (long double)f80s);
				442	sprintf(buf80h, "%.20Le", (long double)f80h);
				443
				444	/* Not bitwise identical, but pretty darn close */
				445	if (0 == strcmp(buf80s, buf80h))
				446	return 0;
				447
				448	printf("\n");
				449	printf("f64: "); show_f64le(f64); printf("\n");
				450	printf("f80h: "); show_f80(f80h); printf("\n");
				451	printf("f80s: "); show_f80(f80s); printf("\n");
				452
				453	printf("[test %d] %.16e -> (hw %s, sw %s)\n",
				454	test_no, (double)f64,
				455	buf80h, buf80s );
				456
				457	return 1;
				458	}
				459
				460
				461
				462	void do_80_to_64_tests ( void )
				463	{
				464	UInt b9,b8,b7,i, j;
				465	Int fails=0, tests=0;
				466	UChar* f64h = malloc(8);
				467	UChar* f64s = malloc(8);
				468	UChar* f80 = malloc(10);
				469	int STEP = 1;
				470
				471	srandom(4343);
				472
				473	/* Ten million random bit patterns */
				474	for (i = 0; i < 10000000; i++) {
				475	tests++;
				476	for (j = 0; j < 10; j++)
				477	f80[j] = (random() >> 7) & 255;
				478
				479	fails += do_80_to_64_test(tests, f80, f64h, f64s);
				480	}
				481
				482	/* 2^24 numbers in which the first 24 bits are tested exhaustively
				483	-- this covers the sign, exponent and leading part of the
				484	mantissa. */
				485	for (b9 = 0; b9 < 256; b9 += STEP) {
				486	for (b8 = 0; b8 < 256; b8 += STEP) {
				487	for (b7 = 0; b7 < 256; b7 += STEP) {
				488	tests++;
				489	for (i = 0; i < 10; i++)
				490	f80[i] = 0;
				491	for (i = 0; i < 8; i++)
				492	f64h[i] = f64s[i] = 0;
				493	f80[9] = b9;
				494	f80[8] = b8;
				495	f80[7] = b7;
				496
				497	fails += do_80_to_64_test(tests, f80, f64h, f64s);
				498	}}}
				499
				500	printf("\n80 -> 64: %d tests, %d fails\n\n", tests, fails);
				501	}
				502
				503
				504	void do_64_to_80_tests ( void )
				505	{
				506	UInt b7,b6,b5,i, j;
				507	Int fails=0, tests=0;
				508	UChar* f80h = malloc(10);
				509	UChar* f80s = malloc(10);
				510	UChar* f64 = malloc(8);
				511	int STEP = 1;
				512
				513	srandom(2323);
				514
				515	/* Ten million random bit patterns */
				516	for (i = 0; i < 10000000; i++) {
				517	tests++;
				518	for (j = 0; j < 8; j++)
				519	f64[j] = (random() >> 13) & 255;
				520
				521	fails += do_64_to_80_test(tests, f64, f80h, f80s);
				522	}
				523
				524	/* 2^24 numbers in which the first 24 bits are tested exhaustively
				525	-- this covers the sign, exponent and leading part of the
				526	mantissa. */
				527	for (b7 = 0; b7 < 256; b7 += STEP) {
				528	for (b6 = 0; b6 < 256; b6 += STEP) {
				529	for (b5 = 0; b5 < 256; b5 += STEP) {
				530	tests++;
				531	for (i = 0; i < 8; i++)
				532	f64[i] = 0;
				533	for (i = 0; i < 10; i++)
				534	f80h[i] = f80s[i] = 0;
				535	f64[7] = b7;
				536	f64[6] = b6;
				537	f64[5] = b5;
				538
				539	fails += do_64_to_80_test(tests, f64, f80h, f80s);
				540	}}}
				541
				542	printf("\n64 -> 80: %d tests, %d fails\n\n", tests, fails);
				543	}
				544
				545
				546	int main ( void )
				547	{
				548	do_80_to_64_tests();
				549	do_64_to_80_tests();
				550	return 0;
				551	}
				552
sewardj	07e0962	2004-09-06 20:39:20 +0000	[diff] [blame^]	553	#endif /* ndef USED_AS_INCLUDE */