Blame - arch/i386/math-emu/poly_sin.c - kernel/msm

blob: a36313fb06f14988f3dbed1faf1ab75ce29f47f1 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*---------------------------------------------------------------------------+
				2	\| poly_sin.c \|
				3	\| \|
				4	\| Computation of an approximation of the sin function and the cosine \|
				5	\| function by a polynomial. \|
				6	\| \|
				7	\| Copyright (C) 1992,1993,1994,1997,1999 \|
				8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia \|
				9	\| E-mail billm@melbpc.org.au \|
				10	\| \|
				11	\| \|
				12	+---------------------------------------------------------------------------*/
				13
				14
				15	#include "exception.h"
				16	#include "reg_constant.h"
				17	#include "fpu_emu.h"
				18	#include "fpu_system.h"
				19	#include "control_w.h"
				20	#include "poly.h"
				21
				22
				23	#define N_COEFF_P 4
				24	#define N_COEFF_N 4
				25
				26	static const unsigned long long pos_terms_l[N_COEFF_P] =
				27	{
				28	0xaaaaaaaaaaaaaaabLL,
				29	0x00d00d00d00cf906LL,
				30	0x000006b99159a8bbLL,
				31	0x000000000d7392e6LL
				32	};
				33
				34	static const unsigned long long neg_terms_l[N_COEFF_N] =
				35	{
				36	0x2222222222222167LL,
				37	0x0002e3bc74aab624LL,
				38	0x0000000b09229062LL,
				39	0x00000000000c7973LL
				40	};
				41
				42
				43
				44	#define N_COEFF_PH 4
				45	#define N_COEFF_NH 4
				46	static const unsigned long long pos_terms_h[N_COEFF_PH] =
				47	{
				48	0x0000000000000000LL,
				49	0x05b05b05b05b0406LL,
				50	0x000049f93edd91a9LL,
				51	0x00000000c9c9ed62LL
				52	};
				53
				54	static const unsigned long long neg_terms_h[N_COEFF_NH] =
				55	{
				56	0xaaaaaaaaaaaaaa98LL,
				57	0x001a01a01a019064LL,
				58	0x0000008f76c68a77LL,
				59	0x0000000000d58f5eLL
				60	};
				61
				62
				63	/*--- poly_sine() -----------------------------------------------------------+
				64	\| \|
				65	+---------------------------------------------------------------------------*/
				66	void poly_sine(FPU_REG *st0_ptr)
				67	{
				68	int exponent, echange;
				69	Xsig accumulator, argSqrd, argTo4;
				70	unsigned long fix_up, adj;
				71	unsigned long long fixed_arg;
				72	FPU_REG result;
				73
				74	exponent = exponent(st0_ptr);
				75
				76	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
				77
				78	/* Split into two ranges, for arguments below and above 1.0 */
				79	/* The boundary between upper and lower is approx 0.88309101259 */
				80	if ( (exponent < -1) \|\| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
				81	{
				82	/* The argument is <= 0.88309101259 */
				83
				84	argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
				85	mul64_Xsig(&argSqrd, &significand(st0_ptr));
				86	shr_Xsig(&argSqrd, 2*(-1-exponent));
				87	argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
				88	argTo4.lsw = argSqrd.lsw;
				89	mul_Xsig_Xsig(&argTo4, &argTo4);
				90
				91	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				92	N_COEFF_N-1);
				93	mul_Xsig_Xsig(&accumulator, &argSqrd);
				94	negate_Xsig(&accumulator);
				95
				96	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				97	N_COEFF_P-1);
				98
				99	shr_Xsig(&accumulator, 2); /* Divide by four */
				100	accumulator.msw \|= 0x80000000; /* Add 1.0 */
				101
				102	mul64_Xsig(&accumulator, &significand(st0_ptr));
				103	mul64_Xsig(&accumulator, &significand(st0_ptr));
				104	mul64_Xsig(&accumulator, &significand(st0_ptr));
				105
				106	/* Divide by four, FPU_REG compatible, etc */
				107	exponent = 3*exponent;
				108
				109	/* The minimum exponent difference is 3 */
				110	shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
				111
				112	negate_Xsig(&accumulator);
				113	XSIG_LL(accumulator) += significand(st0_ptr);
				114
				115	echange = round_Xsig(&accumulator);
				116
				117	setexponentpos(&result, exponent(st0_ptr) + echange);
				118	}
				119	else
				120	{
				121	/* The argument is > 0.88309101259 */
				122	/* We use sin(st(0)) = cos(pi/2-st(0)) */
				123
				124	fixed_arg = significand(st0_ptr);
				125
				126	if ( exponent == 0 )
				127	{
				128	/* The argument is >= 1.0 */
				129
				130	/* Put the binary point at the left. */
				131	fixed_arg <<= 1;
				132	}
				133	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
				134	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
				135	/* There is a special case which arises due to rounding, to fix here. */
				136	if ( fixed_arg == 0xffffffffffffffffLL )
				137	fixed_arg = 0;
				138
				139	XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
				140	mul64_Xsig(&argSqrd, &fixed_arg);
				141
				142	XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
				143	mul_Xsig_Xsig(&argTo4, &argTo4);
				144
				145	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				146	N_COEFF_NH-1);
				147	mul_Xsig_Xsig(&accumulator, &argSqrd);
				148	negate_Xsig(&accumulator);
				149
				150	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				151	N_COEFF_PH-1);
				152	negate_Xsig(&accumulator);
				153
				154	mul64_Xsig(&accumulator, &fixed_arg);
				155	mul64_Xsig(&accumulator, &fixed_arg);
				156
				157	shr_Xsig(&accumulator, 3);
				158	negate_Xsig(&accumulator);
				159
				160	add_Xsig_Xsig(&accumulator, &argSqrd);
				161
				162	shr_Xsig(&accumulator, 1);
				163
				164	accumulator.lsw \|= 1; /* A zero accumulator here would cause problems */
				165	negate_Xsig(&accumulator);
				166
				167	/* The basic computation is complete. Now fix the answer to
				168	compensate for the error due to the approximation used for
				169	pi/2
				170	*/
				171
				172	/* This has an exponent of -65 */
				173	fix_up = 0x898cc517;
				174	/* The fix-up needs to be improved for larger args */
				175	if ( argSqrd.msw & 0xffc00000 )
				176	{
				177	/* Get about 32 bit precision in these: */
				178	fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
				179	}
				180	fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
				181
				182	adj = accumulator.lsw; /* temp save */
				183	accumulator.lsw -= fix_up;
				184	if ( accumulator.lsw > adj )
				185	XSIG_LL(accumulator) --;
				186
				187	echange = round_Xsig(&accumulator);
				188
				189	setexponentpos(&result, echange - 1);
				190	}
				191
				192	significand(&result) = XSIG_LL(accumulator);
				193	setsign(&result, getsign(st0_ptr));
				194	FPU_copy_to_reg0(&result, TAG_Valid);
				195
				196	#ifdef PARANOID
				197	if ( (exponent(&result) >= 0)
				198	&& (significand(&result) > 0x8000000000000000LL) )
				199	{
				200	EXCEPTION(EX_INTERNAL\|0x150);
				201	}
				202	#endif /* PARANOID */
				203
				204	}
				205
				206
				207
				208	/*--- poly_cos() ------------------------------------------------------------+
				209	\| \|
				210	+---------------------------------------------------------------------------*/
				211	void poly_cos(FPU_REG *st0_ptr)
				212	{
				213	FPU_REG result;
				214	long int exponent, exp2, echange;
				215	Xsig accumulator, argSqrd, fix_up, argTo4;
				216	unsigned long long fixed_arg;
				217
				218	#ifdef PARANOID
				219	if ( (exponent(st0_ptr) > 0)
				220	\|\| ((exponent(st0_ptr) == 0)
				221	&& (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
				222	{
				223	EXCEPTION(EX_Invalid);
				224	FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
				225	return;
				226	}
				227	#endif /* PARANOID */
				228
				229	exponent = exponent(st0_ptr);
				230
				231	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
				232
				233	if ( (exponent < -1) \|\| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
				234	{
				235	/* arg is < 0.687705 */
				236
				237	argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
				238	argSqrd.lsw = 0;
				239	mul64_Xsig(&argSqrd, &significand(st0_ptr));
				240
				241	if ( exponent < -1 )
				242	{
				243	/* shift the argument right by the required places */
				244	shr_Xsig(&argSqrd, 2*(-1-exponent));
				245	}
				246
				247	argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
				248	argTo4.lsw = argSqrd.lsw;
				249	mul_Xsig_Xsig(&argTo4, &argTo4);
				250
				251	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				252	N_COEFF_NH-1);
				253	mul_Xsig_Xsig(&accumulator, &argSqrd);
				254	negate_Xsig(&accumulator);
				255
				256	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				257	N_COEFF_PH-1);
				258	negate_Xsig(&accumulator);
				259
				260	mul64_Xsig(&accumulator, &significand(st0_ptr));
				261	mul64_Xsig(&accumulator, &significand(st0_ptr));
				262	shr_Xsig(&accumulator, -2*(1+exponent));
				263
				264	shr_Xsig(&accumulator, 3);
				265	negate_Xsig(&accumulator);
				266
				267	add_Xsig_Xsig(&accumulator, &argSqrd);
				268
				269	shr_Xsig(&accumulator, 1);
				270
				271	/* It doesn't matter if accumulator is all zero here, the
				272	following code will work ok */
				273	negate_Xsig(&accumulator);
				274
				275	if ( accumulator.lsw & 0x80000000 )
				276	XSIG_LL(accumulator) ++;
				277	if ( accumulator.msw == 0 )
				278	{
				279	/* The result is 1.0 */
				280	FPU_copy_to_reg0(&CONST_1, TAG_Valid);
				281	return;
				282	}
				283	else
				284	{
				285	significand(&result) = XSIG_LL(accumulator);
				286
				287	/* will be a valid positive nr with expon = -1 */
				288	setexponentpos(&result, -1);
				289	}
				290	}
				291	else
				292	{
				293	fixed_arg = significand(st0_ptr);
				294
				295	if ( exponent == 0 )
				296	{
				297	/* The argument is >= 1.0 */
				298
				299	/* Put the binary point at the left. */
				300	fixed_arg <<= 1;
				301	}
				302	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
				303	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
				304	/* There is a special case which arises due to rounding, to fix here. */
				305	if ( fixed_arg == 0xffffffffffffffffLL )
				306	fixed_arg = 0;
				307
				308	exponent = -1;
				309	exp2 = -1;
				310
				311	/* A shift is needed here only for a narrow range of arguments,
				312	i.e. for fixed_arg approx 2^-32, but we pick up more... */
				313	if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
				314	{
				315	fixed_arg <<= 16;
				316	exponent -= 16;
				317	exp2 -= 16;
				318	}
				319
				320	XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
				321	mul64_Xsig(&argSqrd, &fixed_arg);
				322
				323	if ( exponent < -1 )
				324	{
				325	/* shift the argument right by the required places */
				326	shr_Xsig(&argSqrd, 2*(-1-exponent));
				327	}
				328
				329	argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
				330	argTo4.lsw = argSqrd.lsw;
				331	mul_Xsig_Xsig(&argTo4, &argTo4);
				332
				333	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				334	N_COEFF_N-1);
				335	mul_Xsig_Xsig(&accumulator, &argSqrd);
				336	negate_Xsig(&accumulator);
				337
				338	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				339	N_COEFF_P-1);
				340
				341	shr_Xsig(&accumulator, 2); /* Divide by four */
				342	accumulator.msw \|= 0x80000000; /* Add 1.0 */
				343
				344	mul64_Xsig(&accumulator, &fixed_arg);
				345	mul64_Xsig(&accumulator, &fixed_arg);
				346	mul64_Xsig(&accumulator, &fixed_arg);
				347
				348	/* Divide by four, FPU_REG compatible, etc */
				349	exponent = 3*exponent;
				350
				351	/* The minimum exponent difference is 3 */
				352	shr_Xsig(&accumulator, exp2 - exponent);
				353
				354	negate_Xsig(&accumulator);
				355	XSIG_LL(accumulator) += fixed_arg;
				356
				357	/* The basic computation is complete. Now fix the answer to
				358	compensate for the error due to the approximation used for
				359	pi/2
				360	*/
				361
				362	/* This has an exponent of -65 */
				363	XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
				364	fix_up.lsw = 0;
				365
				366	/* The fix-up needs to be improved for larger args */
				367	if ( argSqrd.msw & 0xffc00000 )
				368	{
				369	/* Get about 32 bit precision in these: */
				370	fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
				371	fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
				372	}
				373
				374	exp2 += norm_Xsig(&accumulator);
				375	shr_Xsig(&accumulator, 1); /* Prevent overflow */
				376	exp2++;
				377	shr_Xsig(&fix_up, 65 + exp2);
				378
				379	add_Xsig_Xsig(&accumulator, &fix_up);
				380
				381	echange = round_Xsig(&accumulator);
				382
				383	setexponentpos(&result, exp2 + echange);
				384	significand(&result) = XSIG_LL(accumulator);
				385	}
				386
				387	FPU_copy_to_reg0(&result, TAG_Valid);
				388
				389	#ifdef PARANOID
				390	if ( (exponent(&result) >= 0)
				391	&& (significand(&result) > 0x8000000000000000LL) )
				392	{
				393	EXCEPTION(EX_INTERNAL\|0x151);
				394	}
				395	#endif /* PARANOID */
				396
				397	}