Blame - arch/x86/math-emu/poly_sin.c - kernel/msm-4.9

blob: b862039c728e628cd4257afecb194f3b2703699f [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*---------------------------------------------------------------------------+
				2	\| poly_sin.c \|
				3	\| \|
				4	\| Computation of an approximation of the sin function and the cosine \|
				5	\| function by a polynomial. \|
				6	\| \|
				7	\| Copyright (C) 1992,1993,1994,1997,1999 \|
				8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia \|
				9	\| E-mail billm@melbpc.org.au \|
				10	\| \|
				11	\| \|
				12	+---------------------------------------------------------------------------*/
				13
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include "exception.h"
				15	#include "reg_constant.h"
				16	#include "fpu_emu.h"
				17	#include "fpu_system.h"
				18	#include "control_w.h"
				19	#include "poly.h"
				20
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#define N_COEFF_P 4
				22	#define N_COEFF_N 4
				23
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	24	static const unsigned long long pos_terms_l[N_COEFF_P] = {
				25	0xaaaaaaaaaaaaaaabLL,
				26	0x00d00d00d00cf906LL,
				27	0x000006b99159a8bbLL,
				28	0x000000000d7392e6LL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	29	};
				30
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	31	static const unsigned long long neg_terms_l[N_COEFF_N] = {
				32	0x2222222222222167LL,
				33	0x0002e3bc74aab624LL,
				34	0x0000000b09229062LL,
				35	0x00000000000c7973LL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	36	};
				37
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	38	#define N_COEFF_PH 4
				39	#define N_COEFF_NH 4
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	40	static const unsigned long long pos_terms_h[N_COEFF_PH] = {
				41	0x0000000000000000LL,
				42	0x05b05b05b05b0406LL,
				43	0x000049f93edd91a9LL,
				44	0x00000000c9c9ed62LL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	};
				46
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	47	static const unsigned long long neg_terms_h[N_COEFF_NH] = {
				48	0xaaaaaaaaaaaaaa98LL,
				49	0x001a01a01a019064LL,
				50	0x0000008f76c68a77LL,
				51	0x0000000000d58f5eLL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	};
				53
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	54	/*--- poly_sine() -----------------------------------------------------------+
				55	\| \|
				56	+---------------------------------------------------------------------------*/
Ingo Molnar	e8d591d	2008-01-30 13:30:12 +0100	[diff] [blame]	57	void poly_sine(FPU_REG *st0_ptr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	58	{
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	59	int exponent, echange;
				60	Xsig accumulator, argSqrd, argTo4;
				61	unsigned long fix_up, adj;
				62	unsigned long long fixed_arg;
				63	FPU_REG result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	64
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	65	exponent = exponent(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	66
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	67	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	68
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	69	/* Split into two ranges, for arguments below and above 1.0 */
				70	/* The boundary between upper and lower is approx 0.88309101259 */
				71	if ((exponent < -1)
				72	\|\| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
				73	/* The argument is <= 0.88309101259 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	74
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	75	argSqrd.msw = st0_ptr->sigh;
				76	argSqrd.midw = st0_ptr->sigl;
				77	argSqrd.lsw = 0;
				78	mul64_Xsig(&argSqrd, &significand(st0_ptr));
				79	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
				80	argTo4.msw = argSqrd.msw;
				81	argTo4.midw = argSqrd.midw;
				82	argTo4.lsw = argSqrd.lsw;
				83	mul_Xsig_Xsig(&argTo4, &argTo4);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	84
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	85	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				86	N_COEFF_N - 1);
				87	mul_Xsig_Xsig(&accumulator, &argSqrd);
				88	negate_Xsig(&accumulator);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	89
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	90	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				91	N_COEFF_P - 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	92
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	93	shr_Xsig(&accumulator, 2); /* Divide by four */
				94	accumulator.msw \|= 0x80000000; /* Add 1.0 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	95
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	96	mul64_Xsig(&accumulator, &significand(st0_ptr));
				97	mul64_Xsig(&accumulator, &significand(st0_ptr));
				98	mul64_Xsig(&accumulator, &significand(st0_ptr));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	99
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	100	/* Divide by four, FPU_REG compatible, etc */
				101	exponent = 3 * exponent;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	102
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	103	/* The minimum exponent difference is 3 */
				104	shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	105
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	106	negate_Xsig(&accumulator);
				107	XSIG_LL(accumulator) += significand(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	108
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	109	echange = round_Xsig(&accumulator);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	110
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	111	setexponentpos(&result, exponent(st0_ptr) + echange);
				112	} else {
				113	/* The argument is > 0.88309101259 */
				114	/* We use sin(st(0)) = cos(pi/2-st(0)) */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	115
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	116	fixed_arg = significand(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	117
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	118	if (exponent == 0) {
				119	/* The argument is >= 1.0 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	120
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	121	/* Put the binary point at the left. */
				122	fixed_arg <<= 1;
				123	}
				124	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
				125	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
				126	/* There is a special case which arises due to rounding, to fix here. */
				127	if (fixed_arg == 0xffffffffffffffffLL)
				128	fixed_arg = 0;
				129
				130	XSIG_LL(argSqrd) = fixed_arg;
				131	argSqrd.lsw = 0;
				132	mul64_Xsig(&argSqrd, &fixed_arg);
				133
				134	XSIG_LL(argTo4) = XSIG_LL(argSqrd);
				135	argTo4.lsw = argSqrd.lsw;
				136	mul_Xsig_Xsig(&argTo4, &argTo4);
				137
				138	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				139	N_COEFF_NH - 1);
				140	mul_Xsig_Xsig(&accumulator, &argSqrd);
				141	negate_Xsig(&accumulator);
				142
				143	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				144	N_COEFF_PH - 1);
				145	negate_Xsig(&accumulator);
				146
				147	mul64_Xsig(&accumulator, &fixed_arg);
				148	mul64_Xsig(&accumulator, &fixed_arg);
				149
				150	shr_Xsig(&accumulator, 3);
				151	negate_Xsig(&accumulator);
				152
				153	add_Xsig_Xsig(&accumulator, &argSqrd);
				154
				155	shr_Xsig(&accumulator, 1);
				156
				157	accumulator.lsw \|= 1; /* A zero accumulator here would cause problems */
				158	negate_Xsig(&accumulator);
				159
				160	/* The basic computation is complete. Now fix the answer to
				161	compensate for the error due to the approximation used for
				162	pi/2
				163	*/
				164
				165	/* This has an exponent of -65 */
				166	fix_up = 0x898cc517;
				167	/* The fix-up needs to be improved for larger args */
				168	if (argSqrd.msw & 0xffc00000) {
				169	/* Get about 32 bit precision in these: */
				170	fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
				171	}
				172	fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
				173
				174	adj = accumulator.lsw; /* temp save */
				175	accumulator.lsw -= fix_up;
				176	if (accumulator.lsw > adj)
				177	XSIG_LL(accumulator)--;
				178
				179	echange = round_Xsig(&accumulator);
				180
				181	setexponentpos(&result, echange - 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	182	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	183
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	184	significand(&result) = XSIG_LL(accumulator);
				185	setsign(&result, getsign(st0_ptr));
				186	FPU_copy_to_reg0(&result, TAG_Valid);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	187
				188	#ifdef PARANOID
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	189	if ((exponent(&result) >= 0)
				190	&& (significand(&result) > 0x8000000000000000LL)) {
				191	EXCEPTION(EX_INTERNAL \| 0x150);
				192	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	193	#endif /* PARANOID */
				194
				195	}
				196
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	197	/*--- poly_cos() ------------------------------------------------------------+
				198	\| \|
				199	+---------------------------------------------------------------------------*/
Ingo Molnar	e8d591d	2008-01-30 13:30:12 +0100	[diff] [blame]	200	void poly_cos(FPU_REG *st0_ptr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201	{
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	202	FPU_REG result;
				203	long int exponent, exp2, echange;
				204	Xsig accumulator, argSqrd, fix_up, argTo4;
				205	unsigned long long fixed_arg;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	206
				207	#ifdef PARANOID
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	208	if ((exponent(st0_ptr) > 0)
				209	\|\| ((exponent(st0_ptr) == 0)
				210	&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
				211	EXCEPTION(EX_Invalid);
				212	FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
				213	return;
				214	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	215	#endif /* PARANOID */
				216
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	217	exponent = exponent(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	218
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	219	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	220
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	221	if ((exponent < -1)
				222	\|\| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
				223	/* arg is < 0.687705 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	224
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	225	argSqrd.msw = st0_ptr->sigh;
				226	argSqrd.midw = st0_ptr->sigl;
				227	argSqrd.lsw = 0;
				228	mul64_Xsig(&argSqrd, &significand(st0_ptr));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	229
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	230	if (exponent < -1) {
				231	/* shift the argument right by the required places */
				232	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
				233	}
				234
				235	argTo4.msw = argSqrd.msw;
				236	argTo4.midw = argSqrd.midw;
				237	argTo4.lsw = argSqrd.lsw;
				238	mul_Xsig_Xsig(&argTo4, &argTo4);
				239
				240	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				241	N_COEFF_NH - 1);
				242	mul_Xsig_Xsig(&accumulator, &argSqrd);
				243	negate_Xsig(&accumulator);
				244
				245	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				246	N_COEFF_PH - 1);
				247	negate_Xsig(&accumulator);
				248
				249	mul64_Xsig(&accumulator, &significand(st0_ptr));
				250	mul64_Xsig(&accumulator, &significand(st0_ptr));
				251	shr_Xsig(&accumulator, -2 * (1 + exponent));
				252
				253	shr_Xsig(&accumulator, 3);
				254	negate_Xsig(&accumulator);
				255
				256	add_Xsig_Xsig(&accumulator, &argSqrd);
				257
				258	shr_Xsig(&accumulator, 1);
				259
				260	/* It doesn't matter if accumulator is all zero here, the
				261	following code will work ok */
				262	negate_Xsig(&accumulator);
				263
				264	if (accumulator.lsw & 0x80000000)
				265	XSIG_LL(accumulator)++;
				266	if (accumulator.msw == 0) {
				267	/* The result is 1.0 */
				268	FPU_copy_to_reg0(&CONST_1, TAG_Valid);
				269	return;
				270	} else {
				271	significand(&result) = XSIG_LL(accumulator);
				272
				273	/* will be a valid positive nr with expon = -1 */
				274	setexponentpos(&result, -1);
				275	}
				276	} else {
				277	fixed_arg = significand(st0_ptr);
				278
				279	if (exponent == 0) {
				280	/* The argument is >= 1.0 */
				281
				282	/* Put the binary point at the left. */
				283	fixed_arg <<= 1;
				284	}
				285	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
				286	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
				287	/* There is a special case which arises due to rounding, to fix here. */
				288	if (fixed_arg == 0xffffffffffffffffLL)
				289	fixed_arg = 0;
				290
				291	exponent = -1;
				292	exp2 = -1;
				293
				294	/* A shift is needed here only for a narrow range of arguments,
				295	i.e. for fixed_arg approx 2^-32, but we pick up more... */
				296	if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
				297	fixed_arg <<= 16;
				298	exponent -= 16;
				299	exp2 -= 16;
				300	}
				301
				302	XSIG_LL(argSqrd) = fixed_arg;
				303	argSqrd.lsw = 0;
				304	mul64_Xsig(&argSqrd, &fixed_arg);
				305
				306	if (exponent < -1) {
				307	/* shift the argument right by the required places */
				308	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
				309	}
				310
				311	argTo4.msw = argSqrd.msw;
				312	argTo4.midw = argSqrd.midw;
				313	argTo4.lsw = argSqrd.lsw;
				314	mul_Xsig_Xsig(&argTo4, &argTo4);
				315
				316	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				317	N_COEFF_N - 1);
				318	mul_Xsig_Xsig(&accumulator, &argSqrd);
				319	negate_Xsig(&accumulator);
				320
				321	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				322	N_COEFF_P - 1);
				323
				324	shr_Xsig(&accumulator, 2); /* Divide by four */
				325	accumulator.msw \|= 0x80000000; /* Add 1.0 */
				326
				327	mul64_Xsig(&accumulator, &fixed_arg);
				328	mul64_Xsig(&accumulator, &fixed_arg);
				329	mul64_Xsig(&accumulator, &fixed_arg);
				330
				331	/* Divide by four, FPU_REG compatible, etc */
				332	exponent = 3 * exponent;
				333
				334	/* The minimum exponent difference is 3 */
				335	shr_Xsig(&accumulator, exp2 - exponent);
				336
				337	negate_Xsig(&accumulator);
				338	XSIG_LL(accumulator) += fixed_arg;
				339
				340	/* The basic computation is complete. Now fix the answer to
				341	compensate for the error due to the approximation used for
				342	pi/2
				343	*/
				344
				345	/* This has an exponent of -65 */
				346	XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
				347	fix_up.lsw = 0;
				348
				349	/* The fix-up needs to be improved for larger args */
				350	if (argSqrd.msw & 0xffc00000) {
				351	/* Get about 32 bit precision in these: */
				352	fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
				353	fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
				354	}
				355
				356	exp2 += norm_Xsig(&accumulator);
				357	shr_Xsig(&accumulator, 1); /* Prevent overflow */
				358	exp2++;
				359	shr_Xsig(&fix_up, 65 + exp2);
				360
				361	add_Xsig_Xsig(&accumulator, &fix_up);
				362
				363	echange = round_Xsig(&accumulator);
				364
				365	setexponentpos(&result, exp2 + echange);
				366	significand(&result) = XSIG_LL(accumulator);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	367	}
				368
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	369	FPU_copy_to_reg0(&result, TAG_Valid);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	370
				371	#ifdef PARANOID
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	372	if ((exponent(&result) >= 0)
				373	&& (significand(&result) > 0x8000000000000000LL)) {
				374	EXCEPTION(EX_INTERNAL \| 0x151);
				375	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	376	#endif /* PARANOID */
				377
				378	}