Blame - arch/mips/math-emu/sp_maddf.c - kernel/msm-4.9

blob: 7195fe785d81a8f0a2dda6c2139b95175321803c [file] [log] [blame]

Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	1	/*
				2	* IEEE754 floating point arithmetic
				3	* single precision: MADDF.f (Fused Multiply Add)
				4	* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
				5	*
				6	* MIPS floating point support
				7	* Copyright (C) 2015 Imagination Technologies, Ltd.
				8	* Author: Markos Chandras <markos.chandras@imgtec.com>
				9	*
				10	* This program is free software; you can distribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the
				12	* Free Software Foundation; version 2 of the License.
				13	*/
				14
				15	#include "ieee754sp.h"
				16
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	17
				18	static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
				19	union ieee754sp y, enum maddf_flags flags)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	20	{
				21	int re;
				22	int rs;
				23	unsigned rm;
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	24	uint64_t rm64;
				25	uint64_t zm64;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	26	int s;
				27
				28	COMPXSP;
				29	COMPYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	30	COMPZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	31
				32	EXPLODEXSP;
				33	EXPLODEYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	34	EXPLODEZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	35
				36	FLUSHXSP;
				37	FLUSHYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	38	FLUSHZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	39
				40	ieee754_clearcx();
				41
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	42	/*
				43	* Handle the cases when at least one of x, y or z is a NaN.
				44	* Order of precedence is sNaN, qNaN and z, x, y.
				45	*/
				46	if (zc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	47	return ieee754sp_nanxcpt(z);
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	48	if (xc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	49	return ieee754sp_nanxcpt(x);
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	50	if (yc == IEEE754_CLASS_SNAN)
				51	return ieee754sp_nanxcpt(y);
				52	if (zc == IEEE754_CLASS_QNAN)
				53	return z;
				54	if (xc == IEEE754_CLASS_QNAN)
				55	return x;
				56	if (yc == IEEE754_CLASS_QNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	57	return y;
				58
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	59	if (zc == IEEE754_CLASS_DNORM)
				60	SPDNORMZ;
				61	/* ZERO z cases are handled separately below */
				62
				63	switch (CLPAIR(xc, yc)) {
				64
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	65
				66	/*
				67	* Infinity handling
				68	*/
				69	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
				70	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	71	ieee754_setcx(IEEE754_INVALID_OPERATION);
				72	return ieee754sp_indef();
				73
				74	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
				75	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
				76	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
				77	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
				78	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
Aleksandar Markovic	8981bca	2017-07-27 18:08:55 +0200	[diff] [blame]	79	if ((zc == IEEE754_CLASS_INF) &&
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	80	((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) \|\|
				81	((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
Aleksandar Markovic	8981bca	2017-07-27 18:08:55 +0200	[diff] [blame]	82	/*
				83	* Cases of addition of infinities with opposite signs
				84	* or subtraction of infinities with same signs.
				85	*/
				86	ieee754_setcx(IEEE754_INVALID_OPERATION);
				87	return ieee754sp_indef();
				88	}
				89	/*
				90	* z is here either not an infinity, or an infinity having the
				91	* same sign as product (x*y) (in case of MADDF.D instruction)
				92	* or product -(x*y) (in MSUBF.D case). The result must be an
				93	* infinity, and its sign is determined only by the value of
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	94	* (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
Aleksandar Markovic	8981bca	2017-07-27 18:08:55 +0200	[diff] [blame]	95	*/
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	96	if (flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	8981bca	2017-07-27 18:08:55 +0200	[diff] [blame]	97	return ieee754sp_inf(1 ^ (xs ^ ys));
				98	else
				99	return ieee754sp_inf(xs ^ ys);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	100
				101	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
				102	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
				103	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
				104	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
				105	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
				106	if (zc == IEEE754_CLASS_INF)
				107	return ieee754sp_inf(zs);
Aleksandar Markovic	d56a9ca	2017-07-27 18:08:56 +0200	[diff] [blame]	108	if (zc == IEEE754_CLASS_ZERO) {
				109	/* Handle cases +0 + (-0) and similar ones. */
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	110	if ((!(flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	d56a9ca	2017-07-27 18:08:56 +0200	[diff] [blame]	111	&& (zs == (xs ^ ys))) \|\|
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	112	((flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	d56a9ca	2017-07-27 18:08:56 +0200	[diff] [blame]	113	&& (zs != (xs ^ ys))))
				114	/*
				115	* Cases of addition of zeros of equal signs
				116	* or subtraction of zeroes of opposite signs.
				117	* The sign of the resulting zero is in any
				118	* such case determined only by the sign of z.
				119	*/
				120	return z;
				121
				122	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
				123	}
				124	/* xy is here 0, and z is not 0, so just return z /
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	125	return z;
				126
				127	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
				128	SPDNORMX;
				129
				130	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	131	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	132	return ieee754sp_inf(zs);
				133	SPDNORMY;
				134	break;
				135
				136	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	137	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	138	return ieee754sp_inf(zs);
				139	SPDNORMX;
				140	break;
				141
				142	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	4f8479c	2017-07-27 18:08:54 +0200	[diff] [blame]	143	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	144	return ieee754sp_inf(zs);
				145	/* fall through to real computations */
				146	}
				147
				148	/* Finally get to do some computation */
				149
				150	/*
				151	* Do the multiplication bit first
				152	*
				153	* rm = xm * ym, re = xe + ye basically
				154	*
				155	* At this point xm and ym should have been normalized.
				156	*/
				157
				158	/* rm = xm * ym, re = xe+ye basically */
				159	assert(xm & SP_HIDDEN_BIT);
				160	assert(ym & SP_HIDDEN_BIT);
				161
				162	re = xe + ye;
				163	rs = xs ^ ys;
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	164	if (flags & MADDF_NEGATE_PRODUCT)
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	165	rs ^= 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	166
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	167	/* Multiple 24 bit xm and ym to give 48 bit results */
				168	rm64 = (uint64_t)xm * ym;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	169
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	170	/* Shunt to top of word */
				171	rm64 = rm64 << 16;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	172
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	173	/* Put explicit bit at bit 62 if necessary */
				174	if ((int64_t) rm64 < 0) {
				175	rm64 = rm64 >> 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	176	re++;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	177	}
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	178
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	179	assert(rm64 & (1 << 62));
				180
				181	if (zc == IEEE754_CLASS_ZERO) {
				182	/*
				183	* Move explicit bit from bit 62 to bit 26 since the
				184	* ieee754sp_format code expects the mantissa to be
				185	* 27 bits wide (24 + 3 rounding bits).
				186	*/
				187	rm = XSPSRS64(rm64, (62 - 26));
Aleksandar Markovic	4e0694a6	2017-06-19 17:50:12 +0200	[diff] [blame]	188	return ieee754sp_format(rs, re, rm);
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	189	}
Aleksandar Markovic	4e0694a6	2017-06-19 17:50:12 +0200	[diff] [blame]	190
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	191	/* Move explicit bit from bit 23 to bit 62 */
				192	zm64 = (uint64_t)zm << (62 - 23);
				193	assert(zm64 & (1 << 62));
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	194
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	195	/* Make the exponents the same */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	196	if (ze > re) {
				197	/*
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	198	* Have to shift r fraction right to align.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	199	*/
				200	s = ze - re;
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	201	rm64 = XSPSRS64(rm64, s);
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	202	re += s;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	203	} else if (re > ze) {
				204	/*
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	205	* Have to shift z fraction right to align.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	206	*/
				207	s = re - ze;
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	208	zm64 = XSPSRS64(zm64, s);
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	209	ze += s;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	210	}
				211	assert(ze == re);
				212	assert(ze <= SP_EMAX);
				213
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	214	/* Do the addition */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	215	if (zs == rs) {
				216	/*
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	217	* Generate 64 bit result by adding two 63 bit numbers
				218	* leaving result in zm64, zs and ze.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	219	*/
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	220	zm64 = zm64 + rm64;
				221	if ((int64_t)zm64 < 0) { /* carry out */
				222	zm64 = XSPSRS1(zm64);
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	223	ze++;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	224	}
				225	} else {
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	226	if (zm64 >= rm64) {
				227	zm64 = zm64 - rm64;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	228	} else {
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	229	zm64 = rm64 - zm64;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	230	zs = rs;
				231	}
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	232	if (zm64 == 0)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	233	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
				234
				235	/*
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	236	* Put explicit bit at bit 62 if necessary.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	237	*/
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	238	while ((zm64 >> 62) == 0) {
				239	zm64 <<= 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	240	ze--;
				241	}
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	242	}
Douglas Leung	d2b488e	2017-07-27 18:08:58 +0200	[diff] [blame]	243
				244	/*
				245	* Move explicit bit from bit 62 to bit 26 since the
				246	* ieee754sp_format code expects the mantissa to be
				247	* 27 bits wide (24 + 3 rounding bits).
				248	*/
				249	zm = XSPSRS64(zm64, (62 - 26));
				250
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	251	return ieee754sp_format(zs, ze, zm);
				252	}
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	253
				254	union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
				255	union ieee754sp y)
				256	{
				257	return _sp_maddf(z, x, y, 0);
				258	}
				259
				260	union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
				261	union ieee754sp y)
				262	{
Aleksandar Markovic	5cabf99	2017-07-27 18:08:57 +0200	[diff] [blame]	263	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	264	}