Blame - arch/mips/math-emu/sp_maddf.c - kernel/msm-5.4

blob: 6cdaa2a85c9d7a0c5b33bd23eb721ae05ba8e6ed [file] [log] [blame]

Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	1	/*
				2	* IEEE754 floating point arithmetic
				3	* single precision: MADDF.f (Fused Multiply Add)
				4	* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
				5	*
				6	* MIPS floating point support
				7	* Copyright (C) 2015 Imagination Technologies, Ltd.
				8	* Author: Markos Chandras <markos.chandras@imgtec.com>
				9	*
				10	* This program is free software; you can distribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the
				12	* Free Software Foundation; version 2 of the License.
				13	*/
				14
				15	#include "ieee754sp.h"
				16
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	17	enum maddf_flags {
				18	maddf_negate_product = 1 << 0,
				19	};
				20
				21	static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
				22	union ieee754sp y, enum maddf_flags flags)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	23	{
				24	int re;
				25	int rs;
				26	unsigned rm;
				27	unsigned short lxm;
				28	unsigned short hxm;
				29	unsigned short lym;
				30	unsigned short hym;
				31	unsigned lrm;
				32	unsigned hrm;
				33	unsigned t;
				34	unsigned at;
				35	int s;
				36
				37	COMPXSP;
				38	COMPYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	39	COMPZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	40
				41	EXPLODEXSP;
				42	EXPLODEYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	43	EXPLODEZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	44
				45	FLUSHXSP;
				46	FLUSHYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	47	FLUSHZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	48
				49	ieee754_clearcx();
				50
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	51	/*
				52	* Handle the cases when at least one of x, y or z is a NaN.
				53	* Order of precedence is sNaN, qNaN and z, x, y.
				54	*/
				55	if (zc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	56	return ieee754sp_nanxcpt(z);
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	57	if (xc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	58	return ieee754sp_nanxcpt(x);
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	59	if (yc == IEEE754_CLASS_SNAN)
				60	return ieee754sp_nanxcpt(y);
				61	if (zc == IEEE754_CLASS_QNAN)
				62	return z;
				63	if (xc == IEEE754_CLASS_QNAN)
				64	return x;
				65	if (yc == IEEE754_CLASS_QNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	66	return y;
				67
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	68	if (zc == IEEE754_CLASS_DNORM)
				69	SPDNORMZ;
				70	/* ZERO z cases are handled separately below */
				71
				72	switch (CLPAIR(xc, yc)) {
				73
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	74
				75	/*
				76	* Infinity handling
				77	*/
				78	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
				79	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	80	ieee754_setcx(IEEE754_INVALID_OPERATION);
				81	return ieee754sp_indef();
				82
				83	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
				84	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
				85	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
				86	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
				87	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	88	return ieee754sp_inf(xs ^ ys);
				89
				90	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
				91	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
				92	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
				93	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
				94	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
				95	if (zc == IEEE754_CLASS_INF)
				96	return ieee754sp_inf(zs);
				97	/* Multiplication is 0 so just return z */
				98	return z;
				99
				100	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
				101	SPDNORMX;
				102
				103	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	104	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	105	return ieee754sp_inf(zs);
				106	SPDNORMY;
				107	break;
				108
				109	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	110	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	111	return ieee754sp_inf(zs);
				112	SPDNORMX;
				113	break;
				114
				115	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame^]	116	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	117	return ieee754sp_inf(zs);
				118	/* fall through to real computations */
				119	}
				120
				121	/* Finally get to do some computation */
				122
				123	/*
				124	* Do the multiplication bit first
				125	*
				126	* rm = xm * ym, re = xe + ye basically
				127	*
				128	* At this point xm and ym should have been normalized.
				129	*/
				130
				131	/* rm = xm * ym, re = xe+ye basically */
				132	assert(xm & SP_HIDDEN_BIT);
				133	assert(ym & SP_HIDDEN_BIT);
				134
				135	re = xe + ye;
				136	rs = xs ^ ys;
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	137	if (flags & maddf_negate_product)
				138	rs ^= 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	139
				140	/* shunt to top of word */
				141	xm <<= 32 - (SP_FBITS + 1);
				142	ym <<= 32 - (SP_FBITS + 1);
				143
				144	/*
				145	* Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
				146	*/
				147	lxm = xm & 0xffff;
				148	hxm = xm >> 16;
				149	lym = ym & 0xffff;
				150	hym = ym >> 16;
				151
				152	lrm = lxm * lym; /* 16 * 16 => 32 */
				153	hrm = hxm * hym; /* 16 * 16 => 32 */
				154
				155	t = lxm * hym; /* 16 * 16 => 32 */
				156	at = lrm + (t << 16);
				157	hrm += at < lrm;
				158	lrm = at;
				159	hrm = hrm + (t >> 16);
				160
				161	t = hxm * lym; /* 16 * 16 => 32 */
				162	at = lrm + (t << 16);
				163	hrm += at < lrm;
				164	lrm = at;
				165	hrm = hrm + (t >> 16);
				166
				167	rm = hrm \| (lrm != 0);
				168
				169	/*
				170	* Sticky shift down to normal rounding precision.
				171	*/
				172	if ((int) rm < 0) {
				173	rm = (rm >> (32 - (SP_FBITS + 1 + 3))) \|
				174	((rm << (SP_FBITS + 1 + 3)) != 0);
				175	re++;
				176	} else {
				177	rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) \|
				178	((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
				179	}
				180	assert(rm & (SP_HIDDEN_BIT << 3));
				181
Aleksandar Markovic	ddbfff7	2017-06-19 17:50:12 +0200	[diff] [blame]	182	if (zc == IEEE754_CLASS_ZERO)
				183	return ieee754sp_format(rs, re, rm);
				184
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	185	/* And now the addition */
				186
				187	assert(zm & SP_HIDDEN_BIT);
				188
				189	/*
				190	* Provide guard,round and stick bit space.
				191	*/
				192	zm <<= 3;
				193
				194	if (ze > re) {
				195	/*
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	196	* Have to shift r fraction right to align.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	197	*/
				198	s = ze - re;
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	199	rm = XSPSRS(rm, s);
				200	re += s;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	201	} else if (re > ze) {
				202	/*
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	203	* Have to shift z fraction right to align.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	204	*/
				205	s = re - ze;
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	206	zm = XSPSRS(zm, s);
				207	ze += s;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	208	}
				209	assert(ze == re);
				210	assert(ze <= SP_EMAX);
				211
				212	if (zs == rs) {
				213	/*
				214	* Generate 28 bit result of adding two 27 bit numbers
				215	* leaving result in zm, zs and ze.
				216	*/
				217	zm = zm + rm;
				218
				219	if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	220	zm = XSPSRS1(zm);
				221	ze++;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	222	}
				223	} else {
				224	if (zm >= rm) {
				225	zm = zm - rm;
				226	} else {
				227	zm = rm - zm;
				228	zs = rs;
				229	}
				230	if (zm == 0)
				231	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
				232
				233	/*
				234	* Normalize in extended single precision
				235	*/
				236	while ((zm >> (SP_MBITS + 3)) == 0) {
				237	zm <<= 1;
				238	ze--;
				239	}
				240
				241	}
				242	return ieee754sp_format(zs, ze, zm);
				243	}
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	244
				245	union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
				246	union ieee754sp y)
				247	{
				248	return _sp_maddf(z, x, y, 0);
				249	}
				250
				251	union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
				252	union ieee754sp y)
				253	{
				254	return _sp_maddf(z, x, y, maddf_negate_product);
				255	}