Blame - arch/arm/vfp/vfpdouble.c - kernel/msm-4.19

blob: febd115dba28aa5a2314bc6727c3529aa1b6fc4c [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/arch/arm/vfp/vfpdouble.c
				3	*
				4	* This code is derived in part from John R. Housers softfloat library, which
				5	* carries the following notice:
				6	*
				7	* ===========================================================================
				8	* This C source file is part of the SoftFloat IEC/IEEE Floating-point
				9	* Arithmetic Package, Release 2.
				10	*
				11	* Written by John R. Hauser. This work was made possible in part by the
				12	* International Computer Science Institute, located at Suite 600, 1947 Center
				13	* Street, Berkeley, California 94704. Funding was partially provided by the
				14	* National Science Foundation under grant MIP-9311980. The original version
				15	* of this code was written as part of a project to build a fixed-point vector
				16	* processor in collaboration with the University of California at Berkeley,
				17	* overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				18	* is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				19	* arithmetic/softfloat.html'.
				20	*
				21	* THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				22	* has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				23	* TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				24	* PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				25	* AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				26	*
				27	* Derivative works are acceptable, even for commercial purposes, so long as
				28	* (1) they include prominent notice that the work is derivative, and (2) they
				29	* include prominent notice akin to these three paragraphs for those parts of
				30	* this code that are retained.
				31	* ===========================================================================
				32	*/
				33	#include <linux/kernel.h>
				34	#include <linux/bitops.h>
Russell King	438a761	2005-06-29 23:01:02 +0100	[diff] [blame]	35
				36	#include <asm/div64.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	#include <asm/ptrace.h>
				38	#include <asm/vfp.h>
				39
				40	#include "vfpinstr.h"
				41	#include "vfp.h"
				42
				43	static struct vfp_double vfp_double_default_qnan = {
				44	.exponent = 2047,
				45	.sign = 0,
				46	.significand = VFP_DOUBLE_SIGNIFICAND_QNAN,
				47	};
				48
				49	static void vfp_double_dump(const char str, struct vfp_double d)
				50	{
				51	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
				52	str, d->sign != 0, d->exponent, d->significand);
				53	}
				54
				55	static void vfp_double_normalise_denormal(struct vfp_double *vd)
				56	{
				57	int bits = 31 - fls(vd->significand >> 32);
				58	if (bits == 31)
				59	bits = 62 - fls(vd->significand);
				60
				61	vfp_double_dump("normalise_denormal: in", vd);
				62
				63	if (bits) {
				64	vd->exponent -= bits - 1;
				65	vd->significand <<= bits;
				66	}
				67
				68	vfp_double_dump("normalise_denormal: out", vd);
				69	}
				70
				71	u32 vfp_double_normaliseround(int dd, struct vfp_double vd, u32 fpscr, u32 exceptions, const char func)
				72	{
				73	u64 significand, incr;
				74	int exponent, shift, underflow;
				75	u32 rmode;
				76
				77	vfp_double_dump("pack: in", vd);
				78
				79	/*
				80	* Infinities and NaNs are a special case.
				81	*/
				82	if (vd->exponent == 2047 && (vd->significand == 0 \|\| exceptions))
				83	goto pack;
				84
				85	/*
				86	* Special-case zero.
				87	*/
				88	if (vd->significand == 0) {
				89	vd->exponent = 0;
				90	goto pack;
				91	}
				92
				93	exponent = vd->exponent;
				94	significand = vd->significand;
				95
				96	shift = 32 - fls(significand >> 32);
				97	if (shift == 32)
				98	shift = 64 - fls(significand);
				99	if (shift) {
				100	exponent -= shift;
				101	significand <<= shift;
				102	}
				103
				104	#ifdef DEBUG
				105	vd->exponent = exponent;
				106	vd->significand = significand;
				107	vfp_double_dump("pack: normalised", vd);
				108	#endif
				109
				110	/*
				111	* Tiny number?
				112	*/
				113	underflow = exponent < 0;
				114	if (underflow) {
				115	significand = vfp_shiftright64jamming(significand, -exponent);
				116	exponent = 0;
				117	#ifdef DEBUG
				118	vd->exponent = exponent;
				119	vd->significand = significand;
				120	vfp_double_dump("pack: tiny number", vd);
				121	#endif
				122	if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
				123	underflow = 0;
				124	}
				125
				126	/*
				127	* Select rounding increment.
				128	*/
				129	incr = 0;
				130	rmode = fpscr & FPSCR_RMODE_MASK;
				131
				132	if (rmode == FPSCR_ROUND_NEAREST) {
				133	incr = 1ULL << VFP_DOUBLE_LOW_BITS;
				134	if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
				135	incr -= 1;
				136	} else if (rmode == FPSCR_ROUND_TOZERO) {
				137	incr = 0;
				138	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
				139	incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
				140
				141	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
				142
				143	/*
				144	* Is our rounding going to overflow?
				145	*/
				146	if ((significand + incr) < significand) {
				147	exponent += 1;
				148	significand = (significand >> 1) \| (significand & 1);
				149	incr >>= 1;
				150	#ifdef DEBUG
				151	vd->exponent = exponent;
				152	vd->significand = significand;
				153	vfp_double_dump("pack: overflow", vd);
				154	#endif
				155	}
				156
				157	/*
				158	* If any of the low bits (which will be shifted out of the
				159	* number) are non-zero, the result is inexact.
				160	*/
				161	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
				162	exceptions \|= FPSCR_IXC;
				163
				164	/*
				165	* Do our rounding.
				166	*/
				167	significand += incr;
				168
				169	/*
				170	* Infinity?
				171	*/
				172	if (exponent >= 2046) {
				173	exceptions \|= FPSCR_OFC \| FPSCR_IXC;
				174	if (incr == 0) {
				175	vd->exponent = 2045;
				176	vd->significand = 0x7fffffffffffffffULL;
				177	} else {
				178	vd->exponent = 2047; /* infinity */
				179	vd->significand = 0;
				180	}
				181	} else {
				182	if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
				183	exponent = 0;
				184	if (exponent \|\| significand > 0x8000000000000000ULL)
				185	underflow = 0;
				186	if (underflow)
				187	exceptions \|= FPSCR_UFC;
				188	vd->exponent = exponent;
				189	vd->significand = significand >> 1;
				190	}
				191
				192	pack:
				193	vfp_double_dump("pack: final", vd);
				194	{
				195	s64 d = vfp_double_pack(vd);
				196	pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
				197	dd, d, exceptions);
				198	vfp_put_double(dd, d);
				199	}
				200	return exceptions & ~VFP_NAN_FLAG;
				201	}
				202
				203	/*
				204	* Propagate the NaN, setting exceptions if it is signalling.
				205	* 'n' is always a NaN. 'm' may be a number, NaN or infinity.
				206	*/
				207	static u32
				208	vfp_propagate_nan(struct vfp_double vdd, struct vfp_double vdn,
				209	struct vfp_double *vdm, u32 fpscr)
				210	{
				211	struct vfp_double *nan;
				212	int tn, tm = 0;
				213
				214	tn = vfp_double_type(vdn);
				215
				216	if (vdm)
				217	tm = vfp_double_type(vdm);
				218
				219	if (fpscr & FPSCR_DEFAULT_NAN)
				220	/*
				221	* Default NaN mode - always returns a quiet NaN
				222	*/
				223	nan = &vfp_double_default_qnan;
				224	else {
				225	/*
				226	* Contemporary mode - select the first signalling
				227	* NAN, or if neither are signalling, the first
				228	* quiet NAN.
				229	*/
				230	if (tn == VFP_SNAN \|\| (tm != VFP_SNAN && tn == VFP_QNAN))
				231	nan = vdn;
				232	else
				233	nan = vdm;
				234	/*
				235	* Make the NaN quiet.
				236	*/
				237	nan->significand \|= VFP_DOUBLE_SIGNIFICAND_QNAN;
				238	}
				239
				240	vdd = nan;
				241
				242	/*
				243	* If one was a signalling NAN, raise invalid operation.
				244	*/
				245	return tn == VFP_SNAN \|\| tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
				246	}
				247
				248	/*
				249	* Extended operations
				250	*/
				251	static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
				252	{
				253	vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm)));
				254	return 0;
				255	}
				256
				257	static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
				258	{
				259	vfp_put_double(dd, vfp_get_double(dm));
				260	return 0;
				261	}
				262
				263	static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
				264	{
				265	vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm)));
				266	return 0;
				267	}
				268
				269	static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
				270	{
				271	struct vfp_double vdm, vdd;
				272	int ret, tm;
				273
				274	vfp_double_unpack(&vdm, vfp_get_double(dm));
				275	tm = vfp_double_type(&vdm);
				276	if (tm & (VFP_NAN\|VFP_INFINITY)) {
				277	struct vfp_double *vdp = &vdd;
				278
				279	if (tm & VFP_NAN)
				280	ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
				281	else if (vdm.sign == 0) {
				282	sqrt_copy:
				283	vdp = &vdm;
				284	ret = 0;
				285	} else {
				286	sqrt_invalid:
				287	vdp = &vfp_double_default_qnan;
				288	ret = FPSCR_IOC;
				289	}
				290	vfp_put_double(dd, vfp_double_pack(vdp));
				291	return ret;
				292	}
				293
				294	/*
				295	* sqrt(+/- 0) == +/- 0
				296	*/
				297	if (tm & VFP_ZERO)
				298	goto sqrt_copy;
				299
				300	/*
				301	* Normalise a denormalised number
				302	*/
				303	if (tm & VFP_DENORMAL)
				304	vfp_double_normalise_denormal(&vdm);
				305
				306	/*
				307	* sqrt(<0) = invalid
				308	*/
				309	if (vdm.sign)
				310	goto sqrt_invalid;
				311
				312	vfp_double_dump("sqrt", &vdm);
				313
				314	/*
				315	* Estimate the square root.
				316	*/
				317	vdd.sign = 0;
				318	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
				319	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
				320
				321	vfp_double_dump("sqrt estimate1", &vdd);
				322
				323	vdm.significand >>= 1 + (vdm.exponent & 1);
				324	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
				325
				326	vfp_double_dump("sqrt estimate2", &vdd);
				327
				328	/*
				329	* And now adjust.
				330	*/
				331	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
				332	if (vdd.significand < 2) {
				333	vdd.significand = ~0ULL;
				334	} else {
				335	u64 termh, terml, remh, reml;
				336	vdm.significand <<= 2;
				337	mul64to128(&termh, &terml, vdd.significand, vdd.significand);
				338	sub128(&remh, &reml, vdm.significand, 0, termh, terml);
				339	while ((s64)remh < 0) {
				340	vdd.significand -= 1;
				341	shift64left(&termh, &terml, vdd.significand);
				342	terml \|= 1;
				343	add128(&remh, &reml, remh, reml, termh, terml);
				344	}
				345	vdd.significand \|= (remh \| reml) != 0;
				346	}
				347	}
				348	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
				349
				350	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
				351	}
				352
				353	/*
				354	* Equal := ZC
				355	* Less than := N
				356	* Greater than := C
				357	* Unordered := CV
				358	*/
				359	static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
				360	{
				361	s64 d, m;
				362	u32 ret = 0;
				363
				364	m = vfp_get_double(dm);
				365	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
				366	ret \|= FPSCR_C \| FPSCR_V;
				367	if (signal_on_qnan \|\| !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
				368	/*
				369	* Signalling NaN, or signalling on quiet NaN
				370	*/
				371	ret \|= FPSCR_IOC;
				372	}
				373
				374	d = vfp_get_double(dd);
				375	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
				376	ret \|= FPSCR_C \| FPSCR_V;
				377	if (signal_on_qnan \|\| !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
				378	/*
				379	* Signalling NaN, or signalling on quiet NaN
				380	*/
				381	ret \|= FPSCR_IOC;
				382	}
				383
				384	if (ret == 0) {
				385	if (d == m \|\| vfp_double_packed_abs(d \| m) == 0) {
				386	/*
				387	* equal
				388	*/
				389	ret \|= FPSCR_Z \| FPSCR_C;
				390	} else if (vfp_double_packed_sign(d ^ m)) {
				391	/*
				392	* different signs
				393	*/
				394	if (vfp_double_packed_sign(d))
				395	/*
				396	* d is negative, so d < m
				397	*/
				398	ret \|= FPSCR_N;
				399	else
				400	/*
				401	* d is positive, so d > m
				402	*/
				403	ret \|= FPSCR_C;
				404	} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
				405	/*
				406	* d < m
				407	*/
				408	ret \|= FPSCR_N;
				409	} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
				410	/*
				411	* d > m
				412	*/
				413	ret \|= FPSCR_C;
				414	}
				415	}
				416
				417	return ret;
				418	}
				419
				420	static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
				421	{
				422	return vfp_compare(dd, 0, dm, fpscr);
				423	}
				424
				425	static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
				426	{
				427	return vfp_compare(dd, 1, dm, fpscr);
				428	}
				429
				430	static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
				431	{
				432	return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
				433	}
				434
				435	static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
				436	{
				437	return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
				438	}
				439
				440	static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
				441	{
				442	struct vfp_double vdm;
				443	struct vfp_single vsd;
				444	int tm;
				445	u32 exceptions = 0;
				446
				447	vfp_double_unpack(&vdm, vfp_get_double(dm));
				448
				449	tm = vfp_double_type(&vdm);
				450
				451	/*
				452	* If we have a signalling NaN, signal invalid operation.
				453	*/
				454	if (tm == VFP_SNAN)
				455	exceptions = FPSCR_IOC;
				456
				457	if (tm & VFP_DENORMAL)
				458	vfp_double_normalise_denormal(&vdm);
				459
				460	vsd.sign = vdm.sign;
				461	vsd.significand = vfp_hi64to32jamming(vdm.significand);
				462
				463	/*
				464	* If we have an infinity or a NaN, the exponent must be 255
				465	*/
				466	if (tm & (VFP_INFINITY\|VFP_NAN)) {
				467	vsd.exponent = 255;
				468	if (tm & VFP_NAN)
				469	vsd.significand \|= VFP_SINGLE_SIGNIFICAND_QNAN;
				470	goto pack_nan;
				471	} else if (tm & VFP_ZERO)
				472	vsd.exponent = 0;
				473	else
				474	vsd.exponent = vdm.exponent - (1023 - 127);
				475
				476	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
				477
				478	pack_nan:
				479	vfp_put_float(sd, vfp_single_pack(&vsd));
				480	return exceptions;
				481	}
				482
				483	static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
				484	{
				485	struct vfp_double vdm;
				486	u32 m = vfp_get_float(dm);
				487
				488	vdm.sign = 0;
				489	vdm.exponent = 1023 + 63 - 1;
				490	vdm.significand = (u64)m;
				491
				492	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
				493	}
				494
				495	static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
				496	{
				497	struct vfp_double vdm;
				498	u32 m = vfp_get_float(dm);
				499
				500	vdm.sign = (m & 0x80000000) >> 16;
				501	vdm.exponent = 1023 + 63 - 1;
				502	vdm.significand = vdm.sign ? -m : m;
				503
				504	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
				505	}
				506
				507	static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
				508	{
				509	struct vfp_double vdm;
				510	u32 d, exceptions = 0;
				511	int rmode = fpscr & FPSCR_RMODE_MASK;
				512	int tm;
				513
				514	vfp_double_unpack(&vdm, vfp_get_double(dm));
				515
				516	/*
				517	* Do we have a denormalised number?
				518	*/
				519	tm = vfp_double_type(&vdm);
				520	if (tm & VFP_DENORMAL)
				521	exceptions \|= FPSCR_IDC;
				522
				523	if (tm & VFP_NAN)
				524	vdm.sign = 0;
				525
				526	if (vdm.exponent >= 1023 + 32) {
				527	d = vdm.sign ? 0 : 0xffffffff;
				528	exceptions = FPSCR_IOC;
				529	} else if (vdm.exponent >= 1023 - 1) {
				530	int shift = 1023 + 63 - vdm.exponent;
				531	u64 rem, incr = 0;
				532
				533	/*
				534	* 2^0 <= m < 2^32-2^8
				535	*/
				536	d = (vdm.significand << 1) >> shift;
				537	rem = vdm.significand << (65 - shift);
				538
				539	if (rmode == FPSCR_ROUND_NEAREST) {
				540	incr = 0x8000000000000000ULL;
				541	if ((d & 1) == 0)
				542	incr -= 1;
				543	} else if (rmode == FPSCR_ROUND_TOZERO) {
				544	incr = 0;
				545	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
				546	incr = ~0ULL;
				547	}
				548
				549	if ((rem + incr) < rem) {
				550	if (d < 0xffffffff)
				551	d += 1;
				552	else
				553	exceptions \|= FPSCR_IOC;
				554	}
				555
				556	if (d && vdm.sign) {
				557	d = 0;
				558	exceptions \|= FPSCR_IOC;
				559	} else if (rem)
				560	exceptions \|= FPSCR_IXC;
				561	} else {
				562	d = 0;
				563	if (vdm.exponent \| vdm.significand) {
				564	exceptions \|= FPSCR_IXC;
				565	if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
				566	d = 1;
				567	else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
				568	d = 0;
				569	exceptions \|= FPSCR_IOC;
				570	}
				571	}
				572	}
				573
				574	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
				575
				576	vfp_put_float(sd, d);
				577
				578	return exceptions;
				579	}
				580
				581	static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
				582	{
				583	return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
				584	}
				585
				586	static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
				587	{
				588	struct vfp_double vdm;
				589	u32 d, exceptions = 0;
				590	int rmode = fpscr & FPSCR_RMODE_MASK;
Catalin Marinas	1320a80	2006-04-10 21:32:39 +0100	[diff] [blame]	591	int tm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	592
				593	vfp_double_unpack(&vdm, vfp_get_double(dm));
				594	vfp_double_dump("VDM", &vdm);
				595
				596	/*
				597	* Do we have denormalised number?
				598	*/
Catalin Marinas	1320a80	2006-04-10 21:32:39 +0100	[diff] [blame]	599	tm = vfp_double_type(&vdm);
				600	if (tm & VFP_DENORMAL)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	601	exceptions \|= FPSCR_IDC;
				602
Catalin Marinas	1320a80	2006-04-10 21:32:39 +0100	[diff] [blame]	603	if (tm & VFP_NAN) {
				604	d = 0;
				605	exceptions \|= FPSCR_IOC;
				606	} else if (vdm.exponent >= 1023 + 32) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	607	d = 0x7fffffff;
				608	if (vdm.sign)
				609	d = ~d;
				610	exceptions \|= FPSCR_IOC;
				611	} else if (vdm.exponent >= 1023 - 1) {
				612	int shift = 1023 + 63 - vdm.exponent; /* 58 */
				613	u64 rem, incr = 0;
				614
				615	d = (vdm.significand << 1) >> shift;
				616	rem = vdm.significand << (65 - shift);
				617
				618	if (rmode == FPSCR_ROUND_NEAREST) {
				619	incr = 0x8000000000000000ULL;
				620	if ((d & 1) == 0)
				621	incr -= 1;
				622	} else if (rmode == FPSCR_ROUND_TOZERO) {
				623	incr = 0;
				624	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
				625	incr = ~0ULL;
				626	}
				627
				628	if ((rem + incr) < rem && d < 0xffffffff)
				629	d += 1;
				630	if (d > 0x7fffffff + (vdm.sign != 0)) {
				631	d = 0x7fffffff + (vdm.sign != 0);
				632	exceptions \|= FPSCR_IOC;
				633	} else if (rem)
				634	exceptions \|= FPSCR_IXC;
				635
				636	if (vdm.sign)
				637	d = -d;
				638	} else {
				639	d = 0;
				640	if (vdm.exponent \| vdm.significand) {
				641	exceptions \|= FPSCR_IXC;
				642	if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
				643	d = 1;
				644	else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
				645	d = -1;
				646	}
				647	}
				648
				649	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
				650
				651	vfp_put_float(sd, (s32)d);
				652
				653	return exceptions;
				654	}
				655
				656	static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
				657	{
				658	return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
				659	}
				660
				661
				662	static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = {
				663	[FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy,
				664	[FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs,
				665	[FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg,
				666	[FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt,
				667	[FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp,
				668	[FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe,
				669	[FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz,
				670	[FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez,
				671	[FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts,
				672	[FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito,
				673	[FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito,
				674	[FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui,
				675	[FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz,
				676	[FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi,
				677	[FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz,
				678	};
				679
				680
				681
				682
				683	static u32
				684	vfp_double_fadd_nonnumber(struct vfp_double vdd, struct vfp_double vdn,
				685	struct vfp_double *vdm, u32 fpscr)
				686	{
				687	struct vfp_double *vdp;
				688	u32 exceptions = 0;
				689	int tn, tm;
				690
				691	tn = vfp_double_type(vdn);
				692	tm = vfp_double_type(vdm);
				693
				694	if (tn & tm & VFP_INFINITY) {
				695	/*
				696	* Two infinities. Are they different signs?
				697	*/
				698	if (vdn->sign ^ vdm->sign) {
				699	/*
				700	* different signs -> invalid
				701	*/
				702	exceptions = FPSCR_IOC;
				703	vdp = &vfp_double_default_qnan;
				704	} else {
				705	/*
				706	* same signs -> valid
				707	*/
				708	vdp = vdn;
				709	}
				710	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
				711	/*
				712	* One infinity and one number -> infinity
				713	*/
				714	vdp = vdn;
				715	} else {
				716	/*
				717	* 'n' is a NaN of some type
				718	*/
				719	return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
				720	}
				721	vdd = vdp;
				722	return exceptions;
				723	}
				724
				725	static u32
				726	vfp_double_add(struct vfp_double vdd, struct vfp_double vdn,
				727	struct vfp_double *vdm, u32 fpscr)
				728	{
				729	u32 exp_diff;
				730	u64 m_sig;
				731
				732	if (vdn->significand & (1ULL << 63) \|\|
				733	vdm->significand & (1ULL << 63)) {
				734	pr_info("VFP: bad FP values in %s\n", __func__);
				735	vfp_double_dump("VDN", vdn);
				736	vfp_double_dump("VDM", vdm);
				737	}
				738
				739	/*
				740	* Ensure that 'n' is the largest magnitude number. Note that
				741	* if 'n' and 'm' have equal exponents, we do not swap them.
				742	* This ensures that NaN propagation works correctly.
				743	*/
				744	if (vdn->exponent < vdm->exponent) {
				745	struct vfp_double *t = vdn;
				746	vdn = vdm;
				747	vdm = t;
				748	}
				749
				750	/*
				751	* Is 'n' an infinity or a NaN? Note that 'm' may be a number,
				752	* infinity or a NaN here.
				753	*/
				754	if (vdn->exponent == 2047)
				755	return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
				756
				757	/*
				758	* We have two proper numbers, where 'vdn' is the larger magnitude.
				759	*
				760	* Copy 'n' to 'd' before doing the arithmetic.
				761	*/
				762	vdd = vdn;
				763
				764	/*
				765	* Align 'm' with the result.
				766	*/
				767	exp_diff = vdn->exponent - vdm->exponent;
				768	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
				769
				770	/*
				771	* If the signs are different, we are really subtracting.
				772	*/
				773	if (vdn->sign ^ vdm->sign) {
				774	m_sig = vdn->significand - m_sig;
				775	if ((s64)m_sig < 0) {
				776	vdd->sign = vfp_sign_negate(vdd->sign);
				777	m_sig = -m_sig;
Catalin Marinas	7b1fbf2	2005-08-03 19:53:25 +0100	[diff] [blame]	778	} else if (m_sig == 0) {
				779	vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
				780	FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	781	}
				782	} else {
				783	m_sig += vdn->significand;
				784	}
				785	vdd->significand = m_sig;
				786
				787	return 0;
				788	}
				789
				790	static u32
				791	vfp_double_multiply(struct vfp_double vdd, struct vfp_double vdn,
				792	struct vfp_double *vdm, u32 fpscr)
				793	{
				794	vfp_double_dump("VDN", vdn);
				795	vfp_double_dump("VDM", vdm);
				796
				797	/*
				798	* Ensure that 'n' is the largest magnitude number. Note that
				799	* if 'n' and 'm' have equal exponents, we do not swap them.
				800	* This ensures that NaN propagation works correctly.
				801	*/
				802	if (vdn->exponent < vdm->exponent) {
				803	struct vfp_double *t = vdn;
				804	vdn = vdm;
				805	vdm = t;
				806	pr_debug("VFP: swapping M <-> N\n");
				807	}
				808
				809	vdd->sign = vdn->sign ^ vdm->sign;
				810
				811	/*
				812	* If 'n' is an infinity or NaN, handle it. 'm' may be anything.
				813	*/
				814	if (vdn->exponent == 2047) {
				815	if (vdn->significand \|\| (vdm->exponent == 2047 && vdm->significand))
				816	return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
				817	if ((vdm->exponent \| vdm->significand) == 0) {
				818	*vdd = vfp_double_default_qnan;
				819	return FPSCR_IOC;
				820	}
				821	vdd->exponent = vdn->exponent;
				822	vdd->significand = 0;
				823	return 0;
				824	}
				825
				826	/*
				827	* If 'm' is zero, the result is always zero. In this case,
				828	* 'n' may be zero or a number, but it doesn't matter which.
				829	*/
				830	if ((vdm->exponent \| vdm->significand) == 0) {
				831	vdd->exponent = 0;
				832	vdd->significand = 0;
				833	return 0;
				834	}
				835
				836	/*
				837	* We add 2 to the destination exponent for the same reason
				838	* as the addition case - though this time we have +1 from
				839	* each input operand.
				840	*/
				841	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
				842	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
				843
				844	vfp_double_dump("VDD", vdd);
				845	return 0;
				846	}
				847
				848	#define NEG_MULTIPLY (1 << 0)
				849	#define NEG_SUBTRACT (1 << 1)
				850
				851	static u32
				852	vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
				853	{
				854	struct vfp_double vdd, vdp, vdn, vdm;
				855	u32 exceptions;
				856
				857	vfp_double_unpack(&vdn, vfp_get_double(dn));
				858	if (vdn.exponent == 0 && vdn.significand)
				859	vfp_double_normalise_denormal(&vdn);
				860
				861	vfp_double_unpack(&vdm, vfp_get_double(dm));
				862	if (vdm.exponent == 0 && vdm.significand)
				863	vfp_double_normalise_denormal(&vdm);
				864
				865	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
				866	if (negate & NEG_MULTIPLY)
				867	vdp.sign = vfp_sign_negate(vdp.sign);
				868
				869	vfp_double_unpack(&vdn, vfp_get_double(dd));
				870	if (negate & NEG_SUBTRACT)
				871	vdn.sign = vfp_sign_negate(vdn.sign);
				872
				873	exceptions \|= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
				874
				875	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
				876	}
				877
				878	/*
				879	* Standard operations
				880	*/
				881
				882	/*
				883	* sd = sd + (sn * sm)
				884	*/
				885	static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
				886	{
				887	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
				888	}
				889
				890	/*
				891	* sd = sd - (sn * sm)
				892	*/
				893	static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
				894	{
				895	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
				896	}
				897
				898	/*
				899	* sd = -sd + (sn * sm)
				900	*/
				901	static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
				902	{
				903	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
				904	}
				905
				906	/*
				907	* sd = -sd - (sn * sm)
				908	*/
				909	static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
				910	{
				911	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT \| NEG_MULTIPLY, "fnmsc");
				912	}
				913
				914	/*
				915	* sd = sn * sm
				916	*/
				917	static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
				918	{
				919	struct vfp_double vdd, vdn, vdm;
				920	u32 exceptions;
				921
				922	vfp_double_unpack(&vdn, vfp_get_double(dn));
				923	if (vdn.exponent == 0 && vdn.significand)
				924	vfp_double_normalise_denormal(&vdn);
				925
				926	vfp_double_unpack(&vdm, vfp_get_double(dm));
				927	if (vdm.exponent == 0 && vdm.significand)
				928	vfp_double_normalise_denormal(&vdm);
				929
				930	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
				931	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
				932	}
				933
				934	/*
				935	* sd = -(sn * sm)
				936	*/
				937	static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
				938	{
				939	struct vfp_double vdd, vdn, vdm;
				940	u32 exceptions;
				941
				942	vfp_double_unpack(&vdn, vfp_get_double(dn));
				943	if (vdn.exponent == 0 && vdn.significand)
				944	vfp_double_normalise_denormal(&vdn);
				945
				946	vfp_double_unpack(&vdm, vfp_get_double(dm));
				947	if (vdm.exponent == 0 && vdm.significand)
				948	vfp_double_normalise_denormal(&vdm);
				949
				950	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
				951	vdd.sign = vfp_sign_negate(vdd.sign);
				952
				953	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
				954	}
				955
				956	/*
				957	* sd = sn + sm
				958	*/
				959	static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
				960	{
				961	struct vfp_double vdd, vdn, vdm;
				962	u32 exceptions;
				963
				964	vfp_double_unpack(&vdn, vfp_get_double(dn));
				965	if (vdn.exponent == 0 && vdn.significand)
				966	vfp_double_normalise_denormal(&vdn);
				967
				968	vfp_double_unpack(&vdm, vfp_get_double(dm));
				969	if (vdm.exponent == 0 && vdm.significand)
				970	vfp_double_normalise_denormal(&vdm);
				971
				972	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
				973
				974	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
				975	}
				976
				977	/*
				978	* sd = sn - sm
				979	*/
				980	static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
				981	{
				982	struct vfp_double vdd, vdn, vdm;
				983	u32 exceptions;
				984
				985	vfp_double_unpack(&vdn, vfp_get_double(dn));
				986	if (vdn.exponent == 0 && vdn.significand)
				987	vfp_double_normalise_denormal(&vdn);
				988
				989	vfp_double_unpack(&vdm, vfp_get_double(dm));
				990	if (vdm.exponent == 0 && vdm.significand)
				991	vfp_double_normalise_denormal(&vdm);
				992
				993	/*
				994	* Subtraction is like addition, but with a negated operand.
				995	*/
				996	vdm.sign = vfp_sign_negate(vdm.sign);
				997
				998	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
				999
				1000	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
				1001	}
				1002
				1003	/*
				1004	* sd = sn / sm
				1005	*/
				1006	static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
				1007	{
				1008	struct vfp_double vdd, vdn, vdm;
				1009	u32 exceptions = 0;
				1010	int tm, tn;
				1011
				1012	vfp_double_unpack(&vdn, vfp_get_double(dn));
				1013	vfp_double_unpack(&vdm, vfp_get_double(dm));
				1014
				1015	vdd.sign = vdn.sign ^ vdm.sign;
				1016
				1017	tn = vfp_double_type(&vdn);
				1018	tm = vfp_double_type(&vdm);
				1019
				1020	/*
				1021	* Is n a NAN?
				1022	*/
				1023	if (tn & VFP_NAN)
				1024	goto vdn_nan;
				1025
				1026	/*
				1027	* Is m a NAN?
				1028	*/
				1029	if (tm & VFP_NAN)
				1030	goto vdm_nan;
				1031
				1032	/*
				1033	* If n and m are infinity, the result is invalid
				1034	* If n and m are zero, the result is invalid
				1035	*/
				1036	if (tm & tn & (VFP_INFINITY\|VFP_ZERO))
				1037	goto invalid;
				1038
				1039	/*
				1040	* If n is infinity, the result is infinity
				1041	*/
				1042	if (tn & VFP_INFINITY)
				1043	goto infinity;
				1044
				1045	/*
				1046	* If m is zero, raise div0 exceptions
				1047	*/
				1048	if (tm & VFP_ZERO)
				1049	goto divzero;
				1050
				1051	/*
				1052	* If m is infinity, or n is zero, the result is zero
				1053	*/
				1054	if (tm & VFP_INFINITY \|\| tn & VFP_ZERO)
				1055	goto zero;
				1056
				1057	if (tn & VFP_DENORMAL)
				1058	vfp_double_normalise_denormal(&vdn);
				1059	if (tm & VFP_DENORMAL)
				1060	vfp_double_normalise_denormal(&vdm);
				1061
				1062	/*
				1063	* Ok, we have two numbers, we can perform division.
				1064	*/
				1065	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
				1066	vdm.significand <<= 1;
				1067	if (vdm.significand <= (2 * vdn.significand)) {
				1068	vdn.significand >>= 1;
				1069	vdd.exponent++;
				1070	}
				1071	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
				1072	if ((vdd.significand & 0x1ff) <= 2) {
				1073	u64 termh, terml, remh, reml;
				1074	mul64to128(&termh, &terml, vdm.significand, vdd.significand);
				1075	sub128(&remh, &reml, vdn.significand, 0, termh, terml);
				1076	while ((s64)remh < 0) {
				1077	vdd.significand -= 1;
				1078	add128(&remh, &reml, remh, reml, 0, vdm.significand);
				1079	}
				1080	vdd.significand \|= (reml != 0);
				1081	}
				1082	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
				1083
				1084	vdn_nan:
				1085	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
				1086	pack:
				1087	vfp_put_double(dd, vfp_double_pack(&vdd));
				1088	return exceptions;
				1089
				1090	vdm_nan:
				1091	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
				1092	goto pack;
				1093
				1094	zero:
				1095	vdd.exponent = 0;
				1096	vdd.significand = 0;
				1097	goto pack;
				1098
				1099	divzero:
				1100	exceptions = FPSCR_DZC;
				1101	infinity:
				1102	vdd.exponent = 2047;
				1103	vdd.significand = 0;
				1104	goto pack;
				1105
				1106	invalid:
				1107	vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan));
				1108	return FPSCR_IOC;
				1109	}
				1110
				1111	static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = {
				1112	[FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac,
				1113	[FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac,
				1114	[FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc,
				1115	[FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc,
				1116	[FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul,
				1117	[FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul,
				1118	[FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd,
				1119	[FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub,
				1120	[FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv,
				1121	};
				1122
				1123	#define FREG_BANK(x) ((x) & 0x0c)
				1124	#define FREG_IDX(x) ((x) & 3)
				1125
				1126	u32 vfp_double_cpdo(u32 inst, u32 fpscr)
				1127	{
				1128	u32 op = inst & FOP_MASK;
				1129	u32 exceptions = 0;
Catalin Marinas	1356c19	2006-04-10 21:32:46 +0100	[diff] [blame]	1130	unsigned int dd = vfp_get_dd(inst);
				1131	unsigned int dn = vfp_get_dn(inst);
				1132	unsigned int dm = vfp_get_dm(inst);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1133	unsigned int vecitr, veclen, vecstride;
				1134	u32 (*fop)(int, int, s32, u32);
				1135
				1136	veclen = fpscr & FPSCR_LENGTH_MASK;
				1137	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2;
				1138
				1139	/*
				1140	* If destination bank is zero, vector length is always '1'.
				1141	* ARM DDI0100F C5.1.3, C5.3.2.
				1142	*/
				1143	if (FREG_BANK(dd) == 0)
				1144	veclen = 0;
				1145
				1146	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
				1147	(veclen >> FPSCR_LENGTH_BIT) + 1);
				1148
Catalin Marinas	1356c19	2006-04-10 21:32:46 +0100	[diff] [blame]	1149	fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1150	if (!fop)
				1151	goto invalid;
				1152
				1153	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
				1154	u32 except;
				1155
				1156	if (op == FOP_EXT)
Catalin Marinas	1356c19	2006-04-10 21:32:46 +0100	[diff] [blame]	1157	pr_debug("VFP: itr%d (d%u) = op[%u] (d%u)\n",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1158	vecitr >> FPSCR_LENGTH_BIT,
Catalin Marinas	1356c19	2006-04-10 21:32:46 +0100	[diff] [blame]	1159	dd, dn, dm);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1160	else
Catalin Marinas	1356c19	2006-04-10 21:32:46 +0100	[diff] [blame]	1161	pr_debug("VFP: itr%d (d%u) = (d%u) op[%u] (d%u)\n",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1162	vecitr >> FPSCR_LENGTH_BIT,
Catalin Marinas	1356c19	2006-04-10 21:32:46 +0100	[diff] [blame]	1163	dd, dn, FOP_TO_IDX(op), dm);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1164
				1165	except = fop(dd, dn, dm, fpscr);
				1166	pr_debug("VFP: itr%d: exceptions=%08x\n",
				1167	vecitr >> FPSCR_LENGTH_BIT, except);
				1168
				1169	exceptions \|= except;
				1170
				1171	/*
				1172	* This ensures that comparisons only operate on scalars;
				1173	* comparisons always return with one FPSCR status bit set.
				1174	*/
				1175	if (except & (FPSCR_N\|FPSCR_Z\|FPSCR_C\|FPSCR_V))
				1176	break;
				1177
				1178	/*
				1179	* CHECK: It appears to be undefined whether we stop when
				1180	* we encounter an exception. We continue.
				1181	*/
				1182
				1183	dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6);
				1184	dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6);
				1185	if (FREG_BANK(dm) != 0)
				1186	dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6);
				1187	}
				1188	return exceptions;
				1189
				1190	invalid:
				1191	return ~0;
				1192	}